News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

RonyBC's Fireworks Program Fully Commented...almost!

Started by OceanJeff32, March 21, 2005, 06:26:48 AM

Previous topic - Next topic

OceanJeff32

; This code has been changed from the original listing on ronybc.com, especially  the
;BLUR_MMX ;procedure.
;The most Exploding, Apocalyptic ASM graphics code evurr...! Fireworks Fumes Silicon :)
;Win32asm Source Code:


; Fireworks - with MMX blur and light effects
; by ronybc from Kerala,INDIA
; website: http://www.ronybc.8k.com

.686p
.XMM
.model flat,stdcall
option casemap:none
include \masm32\include\windows.inc
include \masm32\include\kernel32.inc
include \masm32\include\gdi32.inc
include \masm32\include\user32.inc

includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\gdi32.lib
includelib \masm32\lib\user32.lib

; struct spark {float x,xv,y,yv;};
; struct FireShell {DWORD life; float air; spark d[250];};
; sizeof FireShell = 250*4*4+8 = 4008 bytes

EXX   EQU 4
EXY   EQU 8
AIR   EQU 12
SPARC EQU 16

.data
ClassName db "apocalypse",0
AppName   db "Fireworks MMX ...by ronybc",0,0,0,0,0,0
info      db "Fireworks Version: 3.40229 - Freeware",13,10
          db  13,10
          db "WARNING: This is a Fireware, softwares that push CPU temperature",13,10
          db "to its maximum. It does No harm, but overclockers better stay away :)",13,10
          db "Entire source code of this program is free available at my website. ",13,10
          db  13,10
          db "If you like the work, help the author with donations.",13,10
          db "see http://www.ronybc.8k.com/support.htm",13,10
          db  13,10
          db "SPACE & ENTER keys toggles 'Gravity and Air' and",13,10
          db "'Light and Smoke' effects respectively.",13,10
          db "And clicks explode..! close clicks produce more light",13,10
          db  13,10
          db "Manufactured, bottled and distributed by",13,10
          db "Silicon Fumes Digital Distilleries, Kerala, INDIA",13,10
          db 13,10
          db "Copyright 1999-2004 © Rony B Chandran. All Rights Reserved",13,10
          db 13,10
          db "This isn't the Final Version",13,10
          db "check http://www.ronybc.8k.com for updates and more",0
seed      dd 2037280626
wwidth    dd 800               ; 1:1.618, The ratio of beauty ;)
wheight   dd 600               ; smaller the window faster the fires
maxx      dd 123               ; 123: values set on execution
maxy      dd 123               ; this thing is best for comparing
lightx    dd 123               ; cpu performance.
lighty    dd 123
flash     dd 123
flfactor  dd 0.92
adg       dd 0.00024           ; 0.00096 acceleration due to gravity
xcut      dd 0.00064
nb        dd 8                 ; number of shells
nd        dd 1200              ; sparks per shell
sb        dd 0                 ; value set on execution
maxpower  dd 7
minlife   dd 50               ; altered @WndProc:WM_COMMAND:1300
motionQ   dd 2                 ; 01-25, altered @WndProc:WM_COMMAND:1210
fcount    dd 0
GMode     dd 1                 ; atmosphere or outer-space
CMode     dd 0                 ; color shifter
EMode     dd 1                 ; special effects
click     dd 0
stop      dd 0
fadelvl   dd 1
whichone  dd 0
chemtable dd 00e0a0ffh, 00f08030h, 00e6c080h, 0040b070h,  00aad580h, \
             00a0e0ffh, 003080f0h, 0086c0e0h, 0070b040h,  008ad5a0h, \
             0010a0f2h, 00403030h, 0056c060h, 0090b0a0h,  00fad5c0h
bminf     BITMAPINFO <<40,0,0,1,24,0,0,0,0,0,0>>

.data?
hInstance HINSTANCE ?
hwnd      LPVOID ?
hmnu      HWND ?
wnddc     HDC ?
hFThread  HANDLE ?
hHeap     HANDLE ?
idThread1 DWORD ?
idThread2 DWORD ?
bitmap1   LPVOID ?
bitmap2   LPVOID ?
hFShells  LPVOID ?
msg       MSG <>
wc        WNDCLASSEX <>

.code

random PROC base:DWORD         ; Park Miller random number algorithm
    mov eax, seed              ; from M32lib/nrand.asm
    xor edx, edx
    mov ecx, 127773
    div ecx
    mov ecx, eax
    mov eax, 16807
    mul edx
    mov edx, ecx
    mov ecx, eax
    mov eax, 2836
    mul edx
    sub ecx, eax
    xor edx, edx
    mov eax, ecx
    mov seed, ecx
    div base
    mov eax, edx
    ret
random ENDP
; -------------------------------------------------------------------------
Light_Flash3 PROC x1:DWORD, y1:DWORD, lum:DWORD, src:DWORD, des:DWORD
    LOCAL mx:DWORD, my:DWORD, x2:DWORD, y2:DWORD, tff:DWORD
    mov eax,lum
    shr eax,1                  ; Light_Flash: dynamic 2D lighting routine
    mov lum,eax                ; does not uses any pre-computed data
    mov tff,255                ; ie. pure light frum tha melting cpu core :)
    mov eax,maxx
    mov mx,eax
    mov eax,maxy
    dec eax
    mov my,eax
    mov esi,src
    mov edi,des
    xor eax,eax
    mov y2,eax
ylp3:                          ; 2x2 instead of per pixel lighting
    xor eax,eax                ; half the quality, but higher speed
    mov x2,eax
xlp3:
    mov eax,y2
    sub eax,y1
    imul eax
    mov ebx,x2
    sub ebx,x1
    imul ebx,ebx
    add eax,ebx
    mov edx,lum
    imul edx,edx
    xor ebx,ebx
    cmp eax,edx
    ja @F                      ; jump to end causes time waves
    push eax
    fild dword ptr[esp]
    fsqrt
    fidiv lum                  ; this code is -nonlinear-
    fld1
    fsubrp st(1),st(0)
    fmul st(0),st(0)           ; curve
    fmul st(0),st(0)           ; curve more
    fimul tff
    fistp dword ptr[esp]

    pop ebx
    imul ebx,01010101h
@@:
    mov eax,y2
    imul maxx
    add eax,x2
    lea eax,[eax+eax*2]
    mov edx,maxx
    lea edx,[edx+edx*2]
    add edx,eax
   
    movd MM2,ebx               ; simply add with saturation
    movq MM0,[esi+eax]         ; gamma correction is against this code
    psllq MM2,32
    movq MM1,[esi+edx]
    movd MM3,ebx
    por MM2,MM3
    paddusb MM0,MM2
    movd [edi+eax],MM0
    paddusb MM1,MM2
    psrlq MM0,32
    movd [edi+edx],MM1
    movd ebx,MM0
    psrlq MM1,32
    mov [edi+eax+4],bx
    movd ecx,MM1
    mov [edi+edx+4],cx
    emms
@@:
    mov eax,x2
    add eax,2
    mov x2,eax
    cmp eax,mx
    jbe xlp3
    mov eax,y2
    add eax,2
    mov y2,eax
    cmp eax,my
    jbe ylp3
    ret
Light_Flash3 ENDP
; -------------------------------------------------------------------------
Blur_MMX2 PROC                 ; 24bit color version
    mov edi,bitmap2            ; (Developed under an old SiS6326 graphic card
    mov esi,bitmap1            ;  which prefers 24bit for faster operation)
    mov bitmap1,edi            ;  Note: SiS315 is excellent, good rendering quality
    mov bitmap2,esi

    pxor MM7,MM7            ; this clears the MM7 register to ZERO
    mov eax,fadelvl         ; loads EAX with fadelvl (multiplier for fade subtraction)
    imul eax,00010001h      ; load WORDs in EAX with 0001h
    mov [ebp-4],eax         ; store this at [ebp-4]
    mov [ebp-8],eax         ; store is also at [ebp-8]
    movq MM6,[ebp-8]        ; now we have 64-bits of 0001h in MM6

    mov eax,maxx            ; load maxx (passed to maxx in WM_SIZE msg, # of columns, x pixels)
    lea eax,[eax+eax*2]     ; set eax = eax*3
    mov ebx,eax             ; set ebx = eax*3
    imul maxy               ; this multiplies eax * maxy (size of window # of rows, y pixels)
    push eax                ; so eax = maxx*3*maxy, bitmap size * 3 for 24-bits per pixel
                            ; eax is also PUSHed onto the stack, so it's now [esp]
    lea edx,[ebx-3]         ; edx = maxx*3 - 3
    lea ebx,[ebx+3]         ; ebx = maxx*3 + 3
    neg edx                 ; edx = -maxx*3 + 3
    xor eax,eax             ; clears EAX to ZERO
    lea esi,[esi-3]         ; offset ESI by 24-bits
@@:
    movd MM0,[esi]             ; load bitmap data. LEFT
    movd MM1,[esi+6] ; RIGHT
    movd MM2,[esi+ebx] ; TOP
    movd MM3,[esi+edx] ; BOTTOM

    punpcklbw MM0,MM7          ; expand register data to 4 16-bit WORDS from bytes
    punpcklbw MM1,MM7
    punpcklbw MM2,MM7
    punpcklbw MM3,MM7

    pavgw   MM0, MM1 ; average information for FIRE BLUR algorithm.
    pavgw   MM2, MM3
    pavgw   MM0, MM2

    psubusw MM0,MM6            ; fade

    packuswb MM0,MM7 ; re-PACK data back to bitmap quality bytes from WORDS
   
    movd [edi+eax],MM0 ; store bitmap data.

    lea esi,[esi+3] ; add to control loop variables and bitmap data index
    lea eax,[eax+3]
    cmp eax,[esp] ; compare to end of bitmap
    jbe @B ; jump back if not done yet.
    pop eax ; restore EAX
    emms                       ; free fpu registers for following
    ret                        ; floating-point functions
Blur_MMX2 ENDP
; -------------------------------------------------------------------------
FShell_explodeOS PROC hb:DWORD
    mov edi,hb
    add edi,SPARC
    mov eax,nd
    dec eax
    shl eax,4
@@:
    fld dword ptr[edi+eax]     ; x coordinate
    fadd dword ptr[edi+eax+4]  ; x velocity
    fstp dword ptr[edi+eax]
    fld dword ptr[edi+eax+8]   ; y coordinate
    fadd dword ptr[edi+eax+12] ; y velocity
    fstp dword ptr[edi+eax+8]
    sub eax,16
    jnc @B
    dec dword ptr[edi-SPARC]
    mov eax,[edi-SPARC]        ; return(--life)
    ret
FShell_explodeOS ENDP
; -------------------------------------------------------------------------
FShell_explodeAG PROC hb:DWORD
    mov edi,hb
    fld adg                    ; acceleration due to gravity
    fld dword ptr[edi+AIR]     ; air resistance
    add edi,SPARC
    mov eax,nd
    dec eax
    shl eax,4
@@:
    fld dword ptr[edi+eax+4]   ; x velocity
    fmul st(0),st(1)           ; deceleration by air
    fst dword ptr[edi+eax+4]
    fadd dword ptr[edi+eax]    ; x coordinate
    fstp dword ptr[edi+eax]
    fld dword ptr[edi+eax+12]  ; y velocity
    fmul st(0),st(1)           ; deceleration by air
    fadd st(0),st(2)           ; gravity
    fst dword ptr[edi+eax+12]
    fadd dword ptr[edi+eax+8]  ; y coordinate
    fstp dword ptr[edi+eax+8]
    sub eax,16
    jnc @B
    fcompp                     ; marks st(0) and st(1) empty
    dec dword ptr[edi-SPARC]
    mov eax,[edi-SPARC]        ; return(--life)
    ret
FShell_explodeAG ENDP
; -------------------------------------------------------------------------
FShell_render PROC hb:DWORD, color:DWORD
    LOCAL expx:DWORD, expy:DWORD
    mov edi,hb ; hFShells data (particle data)

    mov eax,[edi+EXX] ; Get X Coordinate
    mov expx,eax ; store in local expx variable.

    add edi,SPARC ; edi = hFShells+SPARC offset into beginning of spark data

    mov eax,[edi+EXY] ; get Y coordinate
    mov expy,eax ; store in local expy variable.

    mov ebx,color ; # of shells, i know it says color...
    dec ebx ; # of shells, minus one (array multiplier)

    mov ecx,offset chemtable ; point ecx at chemtable (color list)
    mov edx,hFShells ; floats are beautiful, and cheap source of
; the chemical used for multi colored fires
; set edx to point to hFShells particle data.
    add edx,32 ; plus offset for initial variables in the block
    mov eax,CMode ; move CMode variable to eax.
    or eax,eax ; clear eax register to ZERO
    cmovz edx,ecx ; conditional move if zero, set edx to ecx
    mov edx,[edx+ebx*4] ; now set edx = edx + shell data amount (in bytes) ebx*4

    mov ecx,nd ; set ecx = nd (number of shells)
    dec ecx ; ecx = ecx - 1
    shl ecx,4 ; multiply ecx by (16) (nd-1)*16
; ecx now points to last spark data, beginning of structure
; ecx = x, ecx+4 = xv, ecx+8 = y, ecx+12 = yv

    mov esi,bitmap1 ; set esi = pointer to bitmap data

    push maxy ; using stack adds speed (local variables)
; [esp+8] maxy = # of rows in pixels
    push maxx ; [esp+4] maxx = # of columns in pixels
    push edx ; [esp] color to draw.

@@:

    fld dword ptr[edi+ecx+4] ; load x velocity of particle into st0
    fabs ; st0 = absolute value of st0

    fld xcut ; low cost code for independant burnouts st0=xcut st1=st0
; comparison for x velocity, if it reaches below xcut, then
; that particle is too slow to keep around on screen. cool!
    fcomip st(0),st(1) ; this triggers the jae forget

    fistp dword ptr[esp-4] ; ? [esp-4] is defined here, but not used again...
    jae forget ; skips to end of loop iteration

    fld dword ptr[edi+ecx] ; st0=x
    fistp dword ptr[esp-4] ; [esp-4] = st0 (X) (int); also clears st0
    fld dword ptr[edi+ecx+8] ; st0=y
    fistp dword ptr[esp-8] ; [esp-8] = st0 (Y) (int); also clears st0
    mov eax,[esp-8] ; eax = (int) Y
    cmp eax,[esp+8] ; compare it with maxy
    jae forget ; jump if above or equal to forget...cool! Y boundary check
    mov ebx,[esp-4] ; ebx = (int) X
    cmp ebx,[esp+4] ; compare it with maxx
    jae forget ; jump if above or equal to forget...cool! X boundary check
    imul dword ptr[esp+4] ; integer multiply eax * maxx
; eax = Y value, so this eax * maxx is figuring offset into
; bitmap for beginning of row in pixels, not 24-bit pixels...
    add eax,ebx ; this adds x value offset to point to row, and correct column
    lea eax,[eax+eax*2] ; this set eax = eax * 3 (now it's 24-bit pointer)
    mov edx,[esp] ; set edx to color to draw!

    mov [esi+eax],dx ; store 16-bits
    shr edx,16 ; shift 16 bits out
    mov [esi+eax+2],dl ; store 8-bits (that's 24)

forget:

    sub ecx,16 ; ecx is control variable for loop, subtract 16 every time.
    jnc @B ; jumps back to @@ if it's not zero yet

    ret
FShell_render ENDP
; -------------------------------------------------------------------------
FShell_recycle PROC hb:DWORD, x:DWORD, y:DWORD
    mov edi,hb ; edi = pointer to hFShells particle data
    mov eax,x ; eax = x
    mov [edi+EXX],eax ; hFShells + 4 = x
    mov lightx, eax ; X to lightx Light last one
    mov eax,y ; eax = y
    mov [edi+EXY],eax ; hFShells + 8 = Y
    mov lighty,eax ; Y to lighty

    mov eax,flash ; eax = flash having only one light source
    add eax,3200 ; eax += 3200 million jouls...!
    mov flash,eax ; flash = eax add if previous lighting not extinguished
    invoke random,20 ; eax = random 0 to 19
    inc eax ; eax = random 1 to 20 :-)
    imul minlife ; eax *= minlife (overflow to edx)
    mov ebx,eax ; ebx = eax sync explosions by mouse clicks with rest
    mov eax,[edi] ; eax = [edi] by maintaining minimum delay of 'minlife'
; eax = value at beginning of particle data, 1st variable.
    xor edx,edx ; clear edx variable.
    idiv minlife ; eax /= minlife ; edx = % minlife (remainder of div with eax)
    add edx,ebx ; edx += ebx (random 1 to 20 * minlife)
    mov [edi],edx ; set new minlife to edx
    invoke random,30 ; eax = random 0 to 29
    add eax,10 ; eax = random 10 to 39
    mov [esp-4],eax ; [esp-4] = eax = random 10 to 39

    mov eax,10000 ; eax = 10000
    mov [esp-8],eax ; [esp-8] = eax = 10000

    fld1 ; st0 = 1
    fild dword ptr[esp-4] ; st0 = (int) [esp-4] st1 = 1
    fidiv dword ptr[esp-8] ; st0 = (int)[esp-4]/[esp-8] st1=1
    fsubp st(1),st(0) ; st0 = 1-[esp-4]/[esp-8] st1=clear
    fstp dword ptr[edi+AIR] ; [edi+12 AIR] = 1-[esp-4]/[esp-8] st0=clear
    add edi,SPARC ; edi = edi+16 (size of one shell)

    fild y ; st0=(int)y
    fild x ; st0=(int)x st1=(int)y

    mov eax,1000 ; eax = 1000
    mov [esp-4],eax ; [esp-4] = 1000
    fild dword ptr[esp-4] ; st0=1000 st1 = (int) x st2 = (int) y

    invoke random,maxpower ; eax = random 0 to maxpower
    inc eax ; eax = random 1 to maxpower+1
    mov [esp-4],eax ; [esp-4] = eax
    fild dword ptr[esp-4] ; st0=[esp-4] power st1 = 1000
; st2= (int) x st3 = (int) y

    mov ecx,nd ; ecx = # of sparks per shell
    dec ecx ; ecx = # of sparks per shell - 1
    shl ecx,4 ; ecx = (# sparks - 1 )*16 (size of each data struct per spark)
; therefore ecx = total particle data per shell

; x87 FPU registers before loop!
; st0=[esp-4] power st1 = 1000
; st2= (int) x st3 = (int) y
@@:
    push ecx ; store ECX on stack.
    invoke random,2000 ; EAX = random 0 to 1999
    mov [esp-4],eax ; [esp-4] = random 0 to 1999

    fild dword ptr[esp-4] ; st0=rand 0 to 1999
; st1=[esp-4] power st2 = 1000
; st3= (int) x st4 = (int) y

    fsub st(0),st(2) ; st0 = st0 - st2  st1=pow st2=1000 st3=x st4=y
    fdiv st(0),st(2) ; st0 = st0 / st2  st1=pow st2=1000 st3=x st4=y
    fmul st(0),st(1) ; st0 = st0 * st1  st1=pow st2=1000 st3=x st4=y
; (rand 0 to 1999 - 1000)/1000 * [esp-4] power
    mov ecx,[esp] ; store [esp] in ecx
    fstp dword ptr[edi+ecx+4] ; ? stores st0 in [edi+ecx+4] and pops st0 off stack.
; st0=pow st1=1000 st2=x st3=y

    fld st(0) ; st0=pow st1=pow st2=1000 st3=x st4=y
    fmul st(0),st(0) ; st0=pow^2 st1=pow st2=1000 st3=x st4=y

    fld dword ptr[edi+ecx+4] ; st0=[above] st1=pow^2 st2=pow st3=1000 st4=x st5=y
    fmul st(0),st(0) ; st0=[above]^2 st1=pow^2 st2=pow st3=1000 st4=x st5=y
    fsubp st(1),st(0) ; st0=pow^2-[above]^2 st1=pow st2=1000 st3=x st4=y

    fsqrt ; st0=sqrt(pow^2-[above^2]) st1=pow st2=1000 st3=x st4=y
    invoke random,2000 ; eax = 0 to 1999 random
    mov [esp-4],eax ; [esp-4] = eax

    fild dword ptr[esp-4] ; st0=random 0 to 1999
; st1=sqrt(pow^2-[above^2]) st2=pow st3=1000 st4=x st5=y
    fsub st(0),st(3) ; st0=st0-st3
    fdiv st(0),st(3) ; st0=st0/st3
    fmulp st(1),st(0) ; st1=sqrt(pow^2-[above^2])*st0  , which then becomes st0, and
; st0 is gone.
; st0=sqrt(pow^2-[above^2])*st0 st1=pow st2=1000 st3=x st4=y

    mov ecx,[esp] ; ecx = [esp]
    fstp dword ptr[edi+ecx+12] ; [edi+ecx+12] = st0 = sqrt(pow^2-[above^2])*old st0
; st0=pow st1=1000 st2=x st3=y
    fld st(2) ; st0=x st1=pow st2=1000 st3=x st4=y
    fstp dword ptr[edi+ecx] ; [edi+ecx] = x
; st0=pow st1=1000 st2=x st3=y
    fld st(3) ; st0=y st1=pow st2=1000 st3=x st4=y
    fstp dword ptr[edi+ecx+8] ; [edi+ecx+8] = y
; st0=pow st1=1000 st2=x st3=y (exactly as upon entering loop)
    pop ecx ; restore ECX, erase from stack.
    sub ecx,16
    jnc @B
    fcompp ; compare and pop register stack twice
    fcompp ; compare and pop register stack twice
; basically emms (or clear)
    ret
FShell_recycle ENDP
; -------------------------------------------------------------------------

FireThread:
    invoke SetThreadPriority,idThread1,THREAD_PRIORITY_HIGHEST
    invoke GetDC,hwnd ; Get Device Context of Current Window, store in EAX
    mov wnddc,eax ; WNDDC variable holds Device Context of Current Window.
    invoke GetProcessHeap ; returns pointer to this process' memory in EAX register
    mov hHeap,eax ; copies pointer to hHeap variable.
    invoke HeapAlloc,hHeap,HEAP_ZERO_MEMORY,4194304 ; Initialize memory for hHeap
; initializes memory to ZERO, size is 4 megs almost?
    add eax,4096               ; blur: -1'th line problem
    mov bitmap1,eax ; pointer to memory in BITMAP1 variable
    invoke HeapAlloc,hHeap,HEAP_ZERO_MEMORY,4194304
    add eax,4096               ; blur: -1'th line problem
    mov bitmap2,eax

    mov eax,nd ; eax = nd = # of sparks per explosion (shell)
    shl eax,4 ; eax = eax * 16 (# sparks * 16)
    add eax,SPARC ; eax += 16 (# sparks * 16 + 16)
    mov sb,eax ; sb = eax = size of FShell = nd*16+8
    imul nb ; eax = eax * nb = nb*sb (array size)

    invoke HeapAlloc,hHeap,HEAP_ZERO_MEMORY,eax ; allocate memory to hold particle data.
    mov hFShells,eax ; hFShells points to allocated memory for particle data.

    finit                      ; initialise floating point unit
    mov ax,07fh                ; low precision floats
    mov word ptr[esp-4],ax     ; fireworks... not space rockets
    fldcw word ptr[esp-4] ; load FPU control word, adjust math co-processor parameters.

    sub ebp,12                 ; as 3 local variables

lp1:
    mov eax,motionQ ; Motion Q is speed factor.
    mov dword ptr[ebp+8],eax ; EBP+8 is speed factor, used in loop (LP2 entry point)

lp2:
    mov eax,nb ; eax = nb (# of shells/explosions)
    mov [ebp],eax ; [ebp] = number of explosions
    mov eax,hFShells ; eax holds pointer to particle data
    mov [ebp+4],eax ; [ebp+4] holds pointer to particle data

lp3:
    invoke FShell_render,[ebp+4],[ebp] ; calls FShell_Render

    push [ebp+4] ; I simply prefer Open Space (Jeff)
    call FShell_explodeOS

    test eax,eax
    jns @F

    invoke random,maxy ; find random number from 0 to maxy (in eax)
    push eax ; store on stack.
    invoke random, maxx
    push eax ; I just want all explosions on screen! (Jeff)
    push [ebp+4] ; push pointer to particle data on stack.
    call FShell_recycle ; see FShell_Recycle routine.
@@:
    mov eax,sb ; eax = sb Size of one particle
    add [ebp+4],eax ; add size of one particle to pointer of particle data.
    dec dword ptr[ebp] ; subtract one from number of explosions
    jnz lp3 ; end of loop, jump to lp3 if not ZERO from last dec
    dec dword ptr[ebp+8] ; subtract one from Speed Factor Loop Control Variable
    jnz lp2 ; end of loop, jump to lp2 if not ZERO from last dec
    mov eax,EMode ; eax = EMode
    test eax,eax ; 1 or 0
    jz r1 ; jump if ZERO to r1 (see below)
    mov eax,CMode              ; switch pre/post blur according to -
    test eax,eax               ; current chemical in fire
    jz @F
    invoke Blur_MMX2 ; call Bitmap Fire Algorithm (MMX code used)
@@:
    invoke Light_Flash3,lightx,lighty,flash,bitmap1,bitmap2
    invoke SetDIBitsToDevice,wnddc,0,0,maxx,maxy,\
           0,0,0,maxy,bitmap2,ADDR bminf,DIB_RGB_COLORS
    mov eax,CMode
    test eax,eax
    jnz r2
    invoke Blur_MMX2
    jmp r2
r1:
    invoke SetDIBitsToDevice,wnddc,0,0,maxx,maxy,\
           0,0,0,maxy,bitmap1,ADDR bminf,DIB_RGB_COLORS
    mov eax,maxx
    imul maxy
    lea eax,[eax+eax*2]
    invoke RtlZeroMemory,bitmap1,eax
r2:
    inc fcount                 ; count the frames
    fild flash
    fmul flfactor
    fistp flash
;    invoke Sleep,5             ; control, if frames rate goes too high
    mov eax,stop
    test eax,eax
    jz lp1
    invoke ReleaseDC,hwnd,wnddc
    invoke HeapFree,hHeap,0,bitmap1
    invoke HeapFree,hHeap,0,bitmap2
    invoke HeapFree,hHeap,0,hFShells
    mov idThread1,-1
    invoke ExitThread,2003
    hlt                        ; ...! i8085 memories
; -------------------------------------------------------------------------
.data
fps  db 64 dup (0)
fmat db "fps = %u   [www.ronybc.8k.com]",0
fmat2 db "fps = %u   [www.ronybc.8k.com] MMX",0
fmat3 db "fps = %u   [www.ronybc.8k.com] SSE",0
fmat4 db "fps = %u   [www.ronybc.8k.com] SSE2",0
fmat5 db "fps = %u   [www.ronybc.8k.com] Hyper-T",0
fmat6 db "fps = %u   [www.ronybc.8k.com] x87",0
fmat7 db "fps = %u   [www.ronybc.8k.com] RDTSC",0
.code

MoniThread:
    invoke Sleep,1000
    add whichone, 1 ; each time through, add 1 to whichone
    cmp whichone, 7 ; eight is maximum, so if it's greater?
    jl blessthan7 ; if less than, jump beyond next few codes
    mov whichone, 0 ; if it's reached here, start over at 1
    invoke wsprintf,ADDR fps,ADDR fmat,fcount ; then print regular fps, etc.
    invoke SetWindowText,hwnd,ADDR fps ; then print regular fps, etc.
    jmp end01 ; jump to end
blessthan7:
    cmp whichone, 2
    jne bnot2
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx, 200000H ; MMX technology test.
    jz end01 ; if the MMX was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat2,fcount ; then print using fmat2
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot2:
    cmp whichone, 3
    jne bnot3
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx, 800000H ; SSE extensions technology test.
    jz end01 ; if the tech was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat3,fcount ; then print using fmat3
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot3:
    cmp whichone, 4
    jne bnot4
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx,1000000H ; SSE2 extensions technology test.
    jz end01 ; if the tech was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat4,fcount ; then print using fmat4
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot4:
    cmp whichone, 5
    jne bnot5
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx, 4000000H ; Hyper-Threading technology test.
    jz end01 ; if the tech was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat5,fcount ; then print using fmat5
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot5:
    cmp whichone, 6
    jne bnot6
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx, 01H ; x87 FPU co-processor technology test.
    jz end01 ; if the tech was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat6,fcount ; then print using fmat6
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot6:
    cmp whichone, 7
    jne bnot7
    mov eax, 1 ; set up cpuid to test cpu features
    cpuid
    test edx, 08H ; Time Stamp Counter technology test.
    jz end01 ; if the tech was present, zero flag will be
; set to 0, not present zero flag = 1
; jz, jumps if zero flag == 1
    invoke wsprintf,ADDR fps,ADDR fmat7,fcount ; then print using fmat7
    invoke SetWindowText,hwnd,ADDR fps ; then print like regular
    jmp end01
bnot7:
end01:
    xor eax,eax
    mov fcount,eax
    mov eax,stop
    test eax,eax
    jz MoniThread
    mov idThread2,-1
    invoke ExitThread,2003
; -------------------------------------------------------------------------
Switch PROC oMode:DWORD, iid:DWORD
    xor eax,eax
    mov edx,oMode
    or al,byte ptr [edx]
    setz  byte ptr [edx]
    mov eax,[edx]
    mov ebx,MF_CHECKED
    shl eax,3
    and eax,ebx
    or eax,MF_BYCOMMAND
    invoke CheckMenuItem,hmnu,iid,eax
    ret
Switch ENDP
; -------------------------------------------------------------------------
WndProc PROC hWnd:HWND, uMsg:UINT, wParam:WPARAM, lParam:LPARAM
    .IF uMsg==WM_MOUSEMOVE && wParam==MK_CONTROL
        xor edx,edx
        mov flash,2400
        mov eax,lParam
        mov dx,ax
        shr eax,16
        mov lightx,edx
        mov lighty,eax
    .ELSEIF uMsg==WM_SIZE && wParam!=SIZE_MINIMIZED
        xor edx,edx
        mov eax,lParam
        mov dx,ax
        shr eax,16
        shr edx,2
        shl edx,2
        mov maxx,edx
        mov maxy,eax
        mov bminf.bmiHeader.biWidth,edx
        neg eax          ; -maxy
        mov bminf.bmiHeader.biHeight,eax
    .ELSEIF uMsg==WM_KEYDOWN && wParam==VK_SPACE
        invoke Switch,OFFSET GMode,1200
    .ELSEIF uMsg==WM_KEYDOWN && wParam==VK_RETURN
        invoke Switch,OFFSET EMode,1220
        mov flash,0
    .ELSEIF uMsg==WM_RBUTTONDOWN
        invoke MessageBox,hWnd,ADDR info,ADDR AppName,MB_OK or MB_ICONASTERISK
    .ELSEIF uMsg==WM_LBUTTONDOWN
        xor edx,edx
        mov eax,lParam
        mov dx,ax
        shr eax,16
        push eax
        push edx
        mov edx,nb
        dec edx
        mov eax,click
        dec eax
        cmovs eax,edx
        mov click,eax
        imul sb
        add eax,hFShells
        push eax
        call FShell_recycle
    .ELSEIF uMsg==WM_CLOSE
        mov stop,1                  ; stop running threads
        invoke Sleep,100            ; avoid FireThread drawing without window
        invoke DestroyWindow,hwnd
        invoke PostQuitMessage,0
    .ELSEIF uMsg==WM_COMMAND
       .IF wParam==1010
        invoke SendMessage,hwnd,WM_CLOSE,0,0
       .ELSEIF wParam==1000
        invoke SuspendThread,hFThread ; suffering technical difiiculties :)
        mov eax,maxx                  ; major motiv - to see ZeroMem in acion
        imul maxy
        lea eax,[eax+eax*2]
        invoke RtlZeroMemory,bitmap1,eax ; this thing is fast,
        invoke RtlZeroMemory,bitmap2,eax ; but hidden from some API docs
        push nb
        push hFShells
    @@:
        mov eax,maxx
       ;shr eax,1
         shr eax,2
         mov edx,[esp+4]
         dec edx
         imul eax,edx
        mov ebx,maxy
        shr ebx,1
        invoke FShell_recycle,[esp+8],eax,ebx
        mov eax,sb
        add [esp],eax
        dec dword ptr[esp+4]
        jnz @B
        ;mov flash,6400
        invoke ResumeThread,hFThread
        pop eax
        pop eax
       .ELSEIF wParam==1200
        invoke Switch,OFFSET GMode,1200
       .ELSEIF wParam==1210
        invoke Switch,OFFSET CMode,1210
        mov ecx,CMode
        mov eax,16
        shr eax,cl
        mov motionQ,eax        ; changing motionQ affects speed
       .ELSEIF wParam==1220
        invoke Switch,OFFSET EMode,1220
        mov flash,0
       .ELSEIF wParam==1300
        invoke CheckMenuItem,hmnu,1310,MF_BYCOMMAND or MF_UNCHECKED
        invoke CheckMenuItem,hmnu,1300,MF_BYCOMMAND or MF_CHECKED
        mov minlife,500        ; long interval between shoots
       .ELSEIF wParam==1310
        invoke CheckMenuItem,hmnu,1300,MF_BYCOMMAND or MF_UNCHECKED
        invoke CheckMenuItem,hmnu,1310,MF_BYCOMMAND or MF_CHECKED
        mov minlife,100        ; short interval
       .ELSEIF wParam==1400
        invoke MessageBox,hWnd,ADDR info,ADDR AppName,MB_OK or MB_ICONASTERISK
       .ENDIF
    .ELSE
        invoke DefWindowProc,hWnd,uMsg,wParam,lParam       
        ret
    .ENDIF
    xor eax,eax
    ret
WndProc ENDP
; -------------------------------------------------------------------------

start:
    invoke GetModuleHandle,NULL
    mov hInstance,eax
    mov wc.hInstance,eax
    mov wc.cbSize,SIZEOF WNDCLASSEX
    mov wc.style,CS_HREDRAW or CS_VREDRAW or CS_BYTEALIGNCLIENT
    mov wc.lpfnWndProc,OFFSET WndProc
    mov wc.cbClsExtra,NULL
    mov wc.cbWndExtra,NULL
    mov wc.hbrBackground,COLOR_MENUTEXT
    mov wc.lpszMenuName,NULL
    mov wc.lpszClassName,OFFSET ClassName
    invoke LoadCursor,NULL,IDC_ARROW
    mov wc.hCursor,eax
    invoke LoadIcon,hInstance,500
    mov wc.hIcon,eax
    mov wc.hIconSm,eax
    invoke RegisterClassEx,ADDR wc
    invoke CreateWindowEx,WS_EX_OVERLAPPEDWINDOW,ADDR ClassName,ADDR AppName,\
                          WS_OVERLAPPEDWINDOW,CW_USEDEFAULT,\
                          CW_USEDEFAULT,wwidth,wheight,NULL,NULL,\
                          hInstance,NULL
    mov hwnd,eax
    add seed,eax          ;)
    invoke LoadMenu,hInstance,600
    mov hmnu,eax
    invoke SetMenu,hwnd,eax
    invoke CheckMenuItem,hmnu,1200,MF_BYCOMMAND or MF_CHECKED
    invoke CheckMenuItem,hmnu,1220,MF_BYCOMMAND or MF_CHECKED
    invoke CheckMenuItem,hmnu,1300,MF_BYCOMMAND or MF_CHECKED
    invoke ShowWindow,hwnd,SW_SHOWNORMAL
    invoke UpdateWindow,hwnd
    invoke CreateThread,0,4096,ADDR MoniThread,0,0,ADDR idThread1
    invoke CreateThread,0,4096,ADDR FireThread,0,0,ADDR idThread2
    mov hFThread,eax
    MsgLoop:
        invoke GetMessage,ADDR msg,0,0,0
        test eax,eax
        jz EndLoop
        invoke TranslateMessage,ADDR msg
        invoke DispatchMessage,ADDR msg
        jmp MsgLoop
    EndLoop:
    @@: mov eax,idThread1
        or  eax,idThread2
        not eax
        and eax,eax
        jnz @B
    invoke ExitProcess,eax

end start


It is my hopes that Rony B Chandran can see this update of his code that I uploaded here.  Anyone else that can see any improvements, please let me know.  I am still learning this Assembly Language stuff.

LATER GUYS,

Jeff C

Jeff, I added the "code" tags so you code was readable.
:bdg

P.S. The Light_Flash still needs to be commented, but I hope this helps anyone new to ASM and/or graphics.
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

Petroizki

There are some uses in the negative offsets of esp ([esp-4]). There was some discussion about this in the other forum, and it may not be safe to store any data there. Some debuggers mess with the data in the negative offsets of esp.

OceanJeff32

There was a lot of MMX code, that I just took out, and the program works flawlessly!

I'm still inching my way through and understanding more and more about Assembly Language along the way!  No program has grabbed my attention more than this one, as far as teaching me what I want to know about Graphics programming, Intel CPUs inner workings, and Windows 32ASM programming!

I'm going to fully optimize this program with SSE2 extensions for the particle data movement, but I'm going to have to re-structure the data.

Later,

Jeff C
:U
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

OceanJeff32

Mostly, I was working on the BLUR_MMX2 procedure, and I was a bit curious about the loop control, after each iteration, the program adds 3 to the indices, even though I calculate 64-bits, shouldn't I be able to change that to 8?

Nevertheless, this does not work, the program does not deliver the pretty melting fire simulation, the smoky fire blur, after I change it to anything other than 3.

Just curious if anyone else was interested in messing around with this code.

There are a lot more optimizations that I am hatching, and I'm also going to start from scratch to re-do the particle system.

Let me know guys/gals,

Jeff C
:U
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

Phil

Thanks for the pointer to this program Jeff. It's truly awesome from my perspective. I dl'd from his site, assembled as a console app, and then spent a few minutes watching in amazement! What a show! I'll probably spend some time with the source and watching your version evolve here as well. I found the link to your message by doing a keyword search for 'melt'! I'm trying to re-create a very simple program to produce a melting desktop ... Looks like I might discover a few tips by learning how the smoking clouds are done around the bursts.

Good luck with your conversions and optimizations for SSE2! I'm still working on learning the basic CPU but it looks like I'll learn a bit about MMX and SSE if I study this code and do a little diggin'



Jeff

wow thats a cool program.  i am definitely going to look this code over to see how it all works.