Print Page - PutPixel Function

Title: PutPixel Function
Post by: Farabi on March 22, 2008, 12:39:18 PM

Here is my putpixel function, anyone have idea how to optimize it?



PosCount proc x:dword,y:dword,maxX:dword,maxY:dword

	
	mov eax,maxY			; 1 clock cycle
	cmp y,eax				; 2 clock cycle
	jae brs					; 1 clock cycle
	mov eax,maxX			; 1 clock cycle
	cmp x,eax				; 2 clock cycle
	jae brs					; 1 clock cycle
	xor ecx,ecx
	cmp x,ecx
	jl brs
	cmp y,ecx
	jl brs
		mov ecx,y
	;	mov eax,maxX		; 1 clock cycle eax:=maxX
	;	mul y				; 42 clock cyclee ax:=maxX*y
		shl ecx,10
		mov eax,ecx
		add eax,x			; 2 clock cycle eax:=(MaxX * Y)+x
		ret					; 1 clock cycle
		; 54 clock cycle
	brs:
	xor eax,eax
	dec eax
	ret
PosCount endp

PixelPut proc uses esi edi x:dword,y:dword,color:dword
	
	mov edi,main_screen
	invoke PosCount,x,y,1024,768
	cmp eax,-1
	jnz @f
		ret
	@@:
	mov edx,color
	mov [edi+eax*4+2],dl ;r
	mov [edi+eax*4+1],dh ;g
	shr edx,16
	mov [edi+eax*4+0],dl ;b
	
	ret
PixelPut endp

Title: Re: PutPixel Function
Post by: daydreamer on March 22, 2008, 04:17:59 PM

I prefer to do it macro, putpixel is often gonna get called in a loop and most cpus has no to extremely little when it comes to branchprediction for "call"
here is my macros, one of them also do blend and one only outputs one color channel
this works for hires floormapping/skymapping 1280x1024x32bit, ddraw thats why I fetch screenadr with ddsd.lpsurface, plotadot is just a temporary storage dword
this is also designed for simple usage of change ecx,edx for loop a series of pixels

Code Select

;usage ecx= screenx,edx=screeny,ARGB=EAX
plot MACRO
        .IF ecx<1280
            .IF ecx>0
                .IF edx<1024
                    .IF edx>0
        mov ebx,edx
        sal ebx,6
        mov [plotadot],ebx
        sal ebx,2
        add ebx,[plotadot]
        sal ebx,2 ;1280
        add ebx,ecx
        sal ebx,2 ;2
        add ebx,[ddsd.lpSurface]
        
        mov [ebx],eax
            .ENDIF
            .ENDIF
            .ENDIF
            .ENDIF
        ENDM
        plot2 MACRO
        .IF ecx<1280
            .IF ecx>0
                .IF edx<1024
                    .IF edx>0
        mov ebx,edx
        sal ebx,6
        mov [plotadot],ebx
        sal ebx,2
        add ebx,[plotadot]
        sal ebx,2 ;1280
        add ebx,ecx
        sal ebx,2
        add ebx,[ddsd.lpSurface]
        add eax,[ebx]
        mov [ebx],eax
            .ENDIF
            .ENDIF
            .ENDIF
            .ENDIF
        ENDM

plotb MACRO
        .IF ecx<1280
            .IF ecx>0
                .IF edx<1024
                    .IF edx>0
        mov ebx,edx
        sal ebx,6
        mov [plotadot],ebx
        sal ebx,2
        add ebx,[plotadot]
        sal ebx,2 ;1280
        add ebx,ecx
        sal ebx,2
        add ebx,[ddsd.lpSurface]
        ;add ebx,2
        mov [ebx],al
            .ENDIF
            .ENDIF
            .ENDIF
            .ENDIF
        ENDM


plot3   MACRO
        pushad
        push ebx
        mov eax,edx
        ;sub eax,512
        xor edx,edx
        idiv ebx
        ;add eax,512
        mov Y1,eax
        mov eax,ecx
        ;sub eax,640
        xor edx,edx
        pop ebx
        idiv ebx
        ;add eax,640
        mov X1,eax
        popad
        mov ecx,X1
        mov edx,Y1
        plot
        ENDM

Title: Re: PutPixel Function
Post by: u on March 22, 2008, 08:51:21 PM

daydreamer :| use the unsigned hack to merge the [0;1280) comparison.

Here's my version, that takes care of structured clipping:

Code Select



sdSetPixel proc PUBLIC uses eax ecx edx x,y,dwColor
	mov ecx,x
	mov edx,y
	add ecx,SDDrawOffs.x
	add edx,SDDrawOffs.y
	xor eax,eax
	
	cmp ecx,SDBound.left
	jl _ret
	cmp ecx,SDBound.right
	jge _ret
	cmp edx,SDBound.top
	jl _ret
	cmp edx,SDBound.bottom
	jge _ret
	imul edx,sTarget_Data.wid
	mov eax,sTarget_Data.bits
	add ecx,edx
	mov edx,dwColor
	mov [eax+ecx*4],edx
	
_ret:	ret
sdSetPixel endp

Certainly there's some room for improvement.

Title: Re: PutPixel Function
Post by: NightWare on March 23, 2008, 01:13:48 AM

"unsigned hack" also for farabi

Code Select

PosCount proc x:dword,y:dword,maxX:dword,maxY:dword

	mov ecx,x
	mov eax,y
	cmp ecx,maxX
	jae brs
	cmp eax,maxY
	jae brs
	
	;	mov edx,maxX
	;	mul edx
		shl eax,10
		add eax,ecx
		ret
brs:
	mov eax,-1
	ret
PosCount endp

PixelPut proc x:dword,y:dword,color:dword
	
	invoke PosCount,x,y,1024,768
	test eax,eax
	jns @f
		ret
@@:
	mov ecx,main_screen
	mov edx,color
	mov [ecx+eax*4],edx

	ret
PixelPut endp

Title: Re: PutPixel Function
Post by: daydreamer on March 23, 2008, 07:57:50 AM

Quote from: Ultrano on March 22, 2008, 08:51:21 PM
daydreamer :| use the unsigned hack to merge the [0;1280) comparison.

Here's my version, that takes care of structured clipping:
Code Select Expand
sdSetPixel proc PUBLIC uses eax ecx edx x,y,dwColor mov ecx,x mov edx,y add ecx,SDDrawOffs.x add edx,SDDrawOffs.y xor eax,eax cmp ecx,SDBound.left jl _ret cmp ecx,SDBound.right jge _ret cmp edx,SDBound.top jl _ret cmp edx,SDBound.bottom jge _ret imul edx,sTarget_Data.wid mov eax,sTarget_Data.bits add ecx,edx mov edx,dwColor mov [eax+ecx*4],edx _ret: ret sdSetPixel endp

Certainly there's some room for improvement.

clipping should be taken care of with help of MMX/SSE2 and PAND results of all comparision together for a final conditional branch, you could even compare a list of rectangles this way and lots of rectangles can shape anything, wonder if you also should unroll pixeladresscalculation with pmuld instead of imul?
I think farabi's way of having poscount and pixelput in separate proc is having more potential an initial call to poscount and put code here to setup this kinda macro

Code Select

plotdelta MACRO
mov ebx,pixeladress
mov [ebx],eax
add ebx,pixeldelta
mov pixeladress,ebx
ENDM

where pixeldelta is -4 or +4 combined with lPitch or -lPitch (ddraws way of tell how many bytes between scanlines)
anyone can come up with a fast way to shift for 1680 screens? I am clueless

Title: Re: PutPixel Function
Post by: u on March 23, 2008, 08:15:15 AM

About 1680,
it requires mixing the results of 4 shifts in all cases, it's simply inconvenient. An imul will be better (3 cycles).

Title: Re: PutPixel Function
Post by: johnsa on March 25, 2008, 09:32:15 AM

What about using a lookup table then you can avoid the shifts or imul .. especially for 1680.

;ebx = y
;edx = x
mov edi,lookupTable[ebx*4]
add edi,edx

that can take care of your DDraw surface pitch and actually *Y calculation before hand.

Title: Re: PutPixel Function
Post by: u on March 25, 2008, 07:39:29 PM

the bad side of a LUT is that it's L2-cache stalled or *gasp* RAM-stalled. Meanwhile an imul always takes 3 cycles or less.
So, in some designs a LUT is better (randomly plotting points around), but in most cases it's not recommended.
Calling a proc to compute the address - blah! Don't let the C++ compiler beat us so easily, please! Just make several procs like DDraw::LockRect. Or macros, even better.

Title: Re: PutPixel Function
Post by: daydreamer on March 25, 2008, 08:57:58 PM

Quote from: Ultrano on March 25, 2008, 07:39:29 PM
the bad side of a LUT is that it's L2-cache stalled or *gasp* RAM-stalled. Meanwhile an imul always takes 3 cycles or less.
So, in some designs a LUT is better (randomly plotting points around), but in most cases it's not recommended.
Calling a proc to compute the address - blah! Don't let the C++ compiler beat us so easily, please! Just make several procs like DDraw::LockRect. Or macros, even better.

I avoid LUT's except for keep slow fsin's in it and especially not interesting in the case when you need to plot tons of pixels you probably have heavy usage of cache for textures anyway and dont want to read in LUTs that eats bandwidth
I have some SSE code that makes use of a fsincos LUT, but parallel 3drotationcode makes heavy usage of only 4 entries in the LUT
and now when I make use of MMX anyway for saturation, integer packed muls takes even less, but isnt there a penalty for bring it over to general regs?
otherwise unroll a pixelplotter to plot several pixels at once with different Y's could be an option to speedup things
for spriterenderer,simple
ADD ebx,lPitchminusspritewidth

The MASM Forum Archive 2004 to 2012

General Forums => The Laboratory => Topic started by: Farabi on March 22, 2008, 12:39:18 PM