News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Demo with the FASTCALL convention

Started by Vortex, April 26, 2008, 10:08:53 AM

Previous topic - Next topic

Vortex

Poasm supports the FASTCALL calling convention. Here is a quick demo.

QuoteArgument-passing order
The first two DWORD or smaller arguments are passed in ECX and EDX registers.
All other arguments are passed right to left.

Stack-maintenance responsibility
Called function pops the arguments from the stack.

Name-decoration convention
At sign (@) is prefixed to names; an at sign followed by the number of bytes (in decimal)
in the parameter list is suffixed to names.

Case-translation convention
No case translation performed.

http://msdn2.microsoft.com/en-us/library/6xa169sk(VS.71).aspx


.386
.model flat,stdcall
option casemap:none

include     fcalldemo.inc

.data
string1     db '10 + 20 + 30 + 40 = %d',0

.data?
buffer      db 100 dup(?)

.code

fcallproc PROC FASTCALL a:DWORD,b:DWORD,c:DWORD,d:DWORD

    mov     eax,a
    add     eax,b
    add     eax,c
    add     eax,d
    ret

fcallproc ENDP

start:

    invoke  fcallproc,10,20,30,40
    invoke  wsprintf,ADDR buffer,ADDR string1,eax
    invoke  StdOut,ADDR buffer
    invoke  ExitProcess,0
   
END start


Disassembling the object module with Agner Fog's objconv tool :

@fcallproc@16 PROC NEAR
          push    ebp
          mov     ebp, esp
          mov     eax, ecx
          add     eax, edx
          add     eax, dword ptr [ebp + 08H]
          add     eax, dword ptr [ebp + 0CH]
          leave
          ret     8
@fcallproc@16 ENDP

_start  PROC NEAR
          push    40
          push    30
          mov     edx, 20
          mov     ecx, 10
          call    @fcallproc@16
          push    eax
          push    offset string1
          push    offset buffer
          call    _wsprintfA
          add     esp, 12
          push    offset buffer
          call    _StdOut@4
          push    0
          call    _ExitProcess@4

[attachment deleted by admin]

jj2007

          push    40
          push    30
          mov     edx, 20
          mov     ecx, 10


Interesting, but I wonder whether it's really faster - see the stack vs mem thread. Sizewise, it should be almost the same - mov edx, 20 needs more space than the push...

Vortex

Hi jj2007,

I saw that thread. I created this simple example because it would be interesting to use FASTCALL with procedures taking maximum two parameters :

.386
.model flat, stdcall
option casemap:none

.code

testproc PROC FASTCALL a:DWORD,b:DWORD

    mov eax,a
    add eax,b
    ret

testproc ENDP

start:

    invoke  testproc,1,2
    ret

END start



.386
option dotname
.model flat

public @testproc@8
public _start


_text   SEGMENT PARA PUBLIC 'CODE'

@testproc@8 PROC NEAR
        mov     eax, ecx
        add     eax, edx
        ret
@testproc@8 ENDP

_start  PROC NEAR
        mov     edx, 2
        mov     ecx, 1
        call    @testproc@8
        ret
_start  ENDP
_text   ENDS

Synfire

Quote from: jj2007 on April 26, 2008, 10:31:02 AM
Interesting, but I wonder whether it's really faster - see the stack vs mem thread. Sizewise, it should be almost the same - mov edx, 20 needs more space than the push...

Moving data from registers will always be faster than moving from memory. That's the point of registers. FASTCALL is really handy, I tend to use it a lot (probably more so because I'm a NASM user and when I have only two arguments it's easier just to pass them through registers like Vortex did)...

jj2007

Just for fun:
fcallproc PROC FASTCALL a:DWORD,b:DWORD
    mov     eax,a
    add     eax,b
    ret
fcallproc ENDP

SlowCall PROC
    mov     eax,ecx
    add     eax,edx
    ret
SlowCall ENDP

start:
    int 3   ; greetings to Olly
    invoke  fcallproc,10,20
    invoke  wsprintf,ADDR buffer,ADDR string1,eax
    invoke  StdOut,ADDR buffer
    int 3
    mov ecx, 10
    mov edx, 20
    call SlowCall


... translates into:

00401000  /$ 89C8            MOV EAX,ECX
00401002  |. 01D0            ADD EAX,EDX
00401004  \. C3              RETN
00401005  /$ 89C8            MOV EAX,ECX
00401007  |. 01D0            ADD EAX,EDX
00401009  \. C3              RETN
0040100A >/$ CD 03           INT 3
0040100C  |. BA 14000000     MOV EDX,14
00401011  |. B9 0A000000     MOV ECX,0A
00401016  |. E8 E5FFFFFF     CALL fcalldem.00401000
0040101B  |. 50              PUSH EAX                                 ; /<%d>
0040101C  |. 68 00204000     PUSH fcalldem.00402000                   ; |Format = "
fc: 10 + 20 = %d"
00401021  |. 68 E4204000     PUSH fcalldem.004020E4                   ; |s = fcalldem.004020E4
00401026  |. E8 49000000     CALL <JMP.&user32.wsprintfA>             ; \wsprintfA
0040102B  |. 83C4 0C         ADD ESP,0C
0040102E  |. 68 E4204000     PUSH fcalldem.004020E4                   ; /Arg1 = 004020E4 ASCII "
fc: 10 + 20 = 30"
00401033  |. E8 44000000     CALL fcalldem.0040107C                   ; \fcalldem.0040107C
00401038  |. CD 03           INT 3
0040103A  |. B9 0A000000     MOV ECX,0A
0040103F  |. BA 14000000     MOV EDX,14
00401044  |. E8 BCFFFFFF     CALL fcalldem.00401005


The only difference is that with the "SlowCall" you see exactly what you doing... eschew obfuscation ;-)

MichaelW

I adapted my second set of timing macros and modified the code so the cycle counts can be compared between STDCALL and FASTCALL, with 4 and 2 parameters. Results on my P3:

16 cycles, STDCALL
13 cycles, FASTCALL
12 cycles, STDCALL
6 cycles, FASTCALL

[attachment deleted by admin]
eschew obfuscation

Vortex

Here is another FASTCALL function coded with Poasm :

.386
.model flat
option casemap:none

.code

CopyString PROC FASTCALL USES ebx dest:DWORD,src:DWORD

; ecx -> destination
; edx -> source

    mov     eax,-1
@@:
    add     eax,1
    movzx   ebx,BYTE PTR [edx+eax]
    mov     BYTE PTR [ecx+eax],bl
    test    ebx,ebx
    jnz     @b
    ret

CopyString  ENDP

END


Disassembling the object module :


.386
.model flat

public @CopyString@8


_text   SEGMENT PARA PUBLIC 'CODE'

@CopyString@8 PROC NEAR
        push    ebx
        mov     eax, -1
?_001:  add     eax, 1
        movzx   ebx, byte ptr [edx + eax]
        mov     byte ptr [ecx + eax], bl
        test    ebx, ebx
        jnz     ?_001
        pop     ebx
        ret
@CopyString@8 ENDP
_text   ENDS

END

[attachment deleted by admin]

Vortex

Calling the same FASTCALL function from Masm :

include     Test.inc

@CopyString@8 PROTO SYSCALL         ; a trick to avoid the leading underscore
                                    ; in the object module

CopyString EQU <@CopyString@8>

fcallX MACRO func:REQ,param1,param2 ; macro to call FASTCALL functions
                                    ; taking maximum two parameters
    IFNB    <param2>
        mov     edx,param2
    ENDIF

    IFNB    <param1>
        mov     ecx,param1
    ENDIF
   
    call    func

ENDM

.data
src     db 'This is a test string.',0
message db 'Destination buffer = %s',13,10,0

.data?
dest    db 100 dup(?)

.code

start:

    fcallX  CopyString,OFFSET dest,OFFSET src
    invoke  crt_printf,ADDR message,ADDR dest
    invoke  ExitProcess,0

END start

[attachment deleted by admin]