News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Modified JWasm

Started by habran, March 01, 2012, 02:42:17 AM

Previous topic - Next topic

habran

Hi everyone,
While we are waiting for Japheth to come back I have decided to make some changes to JWasm6
I have suggested to Japheth few months ago to make this changes but he was resisting because of
danger for using of a stack in programming

In my opinion an assembler programmer must be able to take care of everything
and we can use local variables or registers instead of pushing and popping a stack

I have tested it on complex files and it works beautifully

here is how I have done it:


changes to JWasm v2.06e source
file: proc.c
line 413
#if AMD64_SUPPORT
    /* adjust start displacement for Win64 FRAME procs.
     * v2.06: the list may contain xmm registers, which have size 16!
     */
    if ( info->isframe ) {
        uint_16 *regs = info->regslist;
        int sizestd = 0;
        int sizexmm = 0;
        if ( regs )
            for( cnt = *regs++; cnt; cnt--, regs++ )
                if ( GetValueSp( *regs ) & OP_XMM )
                    sizexmm += 16;
                else
                    sizestd += 8;
        displ = sizexmm + sizestd;
        if (( sizestd & 0xf ) && sizexmm) //line 428 here was a bug, this fixes it
            displ += 8;
    }
#endif

file: invoke.c
line 148
static void ms64_fcstart( struct dsym *proc, int numparams, int start, struct asm_tok tokenarray[], int *value )
/**************************************************************************************************************/
{
    /* v2.04: VARARG didn't work */
    if ( proc->e.procinfo->is_vararg ) {
        //numparams = ( tokenarray[start].token != T_FINAL ? 1 : 0 );
        for ( numparams = 0; tokenarray[start].token != T_FINAL; start++ )
            if ( tokenarray[start].token == T_COMMA )
                numparams++;
    }
    DebugMsg1(("ms64_fcstart(%s, numparams=%u) vararg=%u\n", proc->sym.name, numparams, proc->e.procinfo->is_vararg ));
    if ( numparams < 4 )
        numparams = 4;
    else if ( numparams & 1 )
        numparams++;
    *value = numparams;
    //AddLineQueueX( " sub %r, %d", T_RSP, numparams * 8 ); //here we prevent invoke to alter the stack
    return;
}

line 170
static void ms64_fcend( struct dsym *proc, int numparams, int value )
/*******************************************************************/
{
    /* use <value>, which has been set by ms64_fcstart() */
    //AddLineQueueX( " add %r, %d", T_RSP, value * 8 ); //here we prevent invoke to alter the stack
    return;
}

;-------------------------------------------------------------------------------------
;here is a modified simple example from Japheth's site how to use the modified version of JWasm.exe
option casemap:none
option win64:1
option frame:auto

    .nolist
    .nocref
WIN32_LEAN_AND_MEAN EQU 1
_WIN64 EQU 1
    include windows.inc
    .list
    .cref
   
    includelib kernel32.lib
    includelib user32.lib

WinMain proto :HINSTANCE, :HINSTANCE, :LPSTR, :UINT
;mov mem to mem instead of PUSH, POP
m2m MACRO M1, M2
    movq xmm4,M2
    movq  M1,xmm4
ENDM
;mov ADDR to MEMORY
a2m MACRO mem,adr
    lea rax,adr
    mov mem,rax
ENDM

.const

.data?
hInstance HINSTANCE ?
CommandLine LPSTR ?
.data
ClassName db "SimpleWinClass",0
AppName  db "Our First Window",0
.code

WinMainCRTStartup proc FRAME
local shadows[4]:QWORD  ;shadow space to accomodate all calls in this function min 4 QWORD
                        ;it has to be the last local var
    invoke GetModuleHandleA, NULL
    mov    hInstance, rax
    invoke GetCommandLineA
    mov    CommandLine, rax
    invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
    invoke ExitProcess, eax

WinMainCRTStartup endp
;-------------------------------------------------------------------------------------
WinMain proc FRAME hInst:HINSTANCE, hPrevInst:HINSTANCE, CmdLine:LPSTR, CmdShow:UINT

local wc:WNDCLASSEXA
local msg:MSG
local hwnd:HWND
local shadows[12]:QWORD ;shadow space to accomodate all calls in this function
                        ;CreateWindowExA has 12 parameters
    mov   hInst, rcx
    mov   wc.cbSize, SIZEOF WNDCLASSEXA
    mov   wc.style, CS_HREDRAW or CS_VREDRAW
    a2m   wc.lpfnWndProc, WndProc
    mov   wc.cbClsExtra, NULL
    mov   wc.cbWndExtra, NULL
    mov   wc.hInstance, rcx
    mov   wc.hbrBackground, COLOR_WINDOW+1
    mov   wc.lpszMenuName, NULL
    a2m   wc.lpszClassName, ClassName
    invoke LoadIconA, NULL, IDI_APPLICATION
    mov   wc.hIcon, rax
    mov   wc.hIconSm, rax
    invoke LoadCursorA, NULL, IDC_ARROW
    mov   wc.hCursor,rax
    invoke RegisterClassExA, addr wc
    invoke CreateWindowExA, NULL, ADDR ClassName, ADDR AppName,\
           WS_OVERLAPPEDWINDOW, CW_USEDEFAULT,\
           CW_USEDEFAULT, CW_USEDEFAULT,CW_USEDEFAULT, NULL, NULL,\
           hInst, NULL
    mov   hwnd,rax
    invoke ShowWindow, hwnd, SW_SHOWNORMAL
    invoke UpdateWindow, hwnd
    .while (1)
        invoke GetMessageA, ADDR msg, NULL, 0, 0
        .break .if (!rax)
        invoke TranslateMessage, ADDR msg
        invoke DispatchMessageA, ADDR msg
    .endw
    mov   rax, msg.wParam
    ret
WinMain endp
;-------------------------------------------------------------------------------------
WndProc proc FRAME hWnd:HWND, uMsg:UINT, wParam:WPARAM, lParam:LPARAM
local shadows[4]:QWORD   ;shadow space to accomodate all calls in this function min 4 QWORD
    .if (edx == WM_DESTROY)
        invoke PostQuitMessage, NULL
        xor rax,rax
    .else
        invoke DefWindowProcA, rcx, edx, r8, r9
    .endif
    ret
WndProc endp

end WinMainCRTStartup



regards

qWord

hi,

that's nice.
I've also ask Japhet to add an option, that allows this. He meant that current implementation of INVOKE and/or PROC are not final - let see what he will do when he comes back to jWasm.
(Automatecally calculation and allocation of required shadow space, based on used INVOKEs, would also be a nice feature)

qWord

FPU in a trice: SmplMath
It's that simple!

habran

thanks qWord,
you are right about automatic calculation
I have been studying how to implement it and,
if Japheth doesn't come back soon, I will do it myself
however, for now this version works fine for me and
it will be easy to remove local shadows ones when automatic calculation is done
I wish tat source version 2.07 was available, because there is some improvement done
although there is still that error in the line 428 which calculates wrong stack alignment

regards


johnsa

From what I've seen trying to use jwasm so-far there are a few things that need to be done.
(Ive spoken to Japheth about them as well but he's seriously time constrained to get round to any of this).

Here is the state of things as I understand it:

1) The ABI says that at least 4 qwords should be reserved as shadow space for parameters. Where this doesn't make sense to me (and unless i'm missing something the actually ABI is wrong)...
parameters are passed in RCX,RDX,R8,R9 .. for integers/pointers... floats in XMM0-XMM3 ... so surely there should be more space reserved to handle shadowing float/double params?

2) We really need the ability to align LOCALS (automatically for qwords,dwords etc) but explicitly for structs so that we can say LOCAL mySIMDVar:_m128:ALIGN 16 (and the rsp is accounted for correctly).

3) JWASM needs to ensure that RSP is aligned 16 on start and stays that way through all procedures to allow 2 to work.

4) Invoke shouldn't touch the stack at all, invokes parameters should be summed up and the shadow space reserved accordingly in the parent proc.

5) I think that in some cases jwasm isn't inserting things into the stack space correctly to ensure alignment of qwords, dwords etc..

My main concern with all of the above while testing, is that we should try to emulate the output of VC as closely as possible. At present i cannot use profilers or visual studio debugger with 64bit jwasm apps as the locals and parameters aren't not being picked up based on where VS expects them to be in the stack frame.

johnsa

In the posted code update above, maxparams isn't defined anywhere, I just made it static int maxparams = 0 ? in the same file...
Compiles fine with PellesC + Makefile.. however when i now try to assemble the source (as you provided) or any other I get this error:

test64.asm(51) : Error A2172: Initializer magnitude too large

and line 51 is:

WinMainCRTStartup endp

??

johnsa

Ok, I tried to re-compile the original (un-modded) source and that same error occurred. Thats with Pelles C + make.
I switched to using the MSVC.Mak with VS2010, no problems at all.. compiles perfectly, and the jwasm exe assembles the provided source fine.

However.. I still see no locals,params at all in Visual Studio :(

habran

Hi johnsa,
you can not see them because JWasm doesn't support them yet
CodeView V8 symbolic debugging information for output format COFF is in the pipeline for the next release
for now you have to put them in registers for debugging purpose
and when everything works fine you can put them back in to variables

I am still working on automatic calculation and allocation of required shadow space

when I finish I will post it here

regards

donkey

Quote from: johnsa on March 01, 2012, 11:31:35 AM
1) The ABI says that at least 4 qwords should be reserved as shadow space for parameters. Where this doesn't make sense to me (and unless i'm missing something the actually ABI is wrong)...
parameters are passed in RCX,RDX,R8,R9 .. for integers/pointers... floats in XMM0-XMM3 ... so surely there should be more space reserved to handle shadowing float/double params?

Perhaps I've misunderstood the ABI then, as I read it only the 4 integer registers are copied to shadow space and that only 32 bytes must be reserved, with additional space used by pushing parameters onto the stack when the number of parameters exceeds 4. Floating point and doubles are not copied to shadow space at all and are accessed directly from the XMM registers, the corresponding integer register is ignored but its slot is still used (the register value is ignored). The diagram for x64 stack usage shows this quite clearly.

http://msdn.microsoft.com/en-us/library/ew5tede7.aspx



Edgar
"Ahhh, what an awful dream. Ones and zeroes everywhere...[shudder] and I thought I saw a two." -- Bender
"It was just a dream, Bender. There's no such thing as two". -- Fry
-- Futurama

Donkey's Stable

johnsa

Donkey, I agree with what you're saying the ABI does make it clear that ONLY the integers are shadowed, which means a reservation on the stack of 4*8... my point is that to me the actual abi is wrong..

imagine..

myProc proc var1:REAL4, var2:REAL4, var3:REAL4, var4:REAL4

fld dword ptr var1
fmul FP4(10.0)
fstp dword ptr var1

ret
myProc endp

for example...
this is now totally cocked because of the abi,
fld dword ptr xmm0 isn't going to work very well :)

granted, knowing that its in xmm0 means you could code around it.. but to be honest i think the fastcall convention was a really stupid decision on MS's part.. it adds unnecessary complexity and confusion, and doesn't really offer any real speed benefit.. what would have made more sense is to use a modified stdcall that does the same smart stack allocation (ie: max called param count) and then used movs to put the values on stack instead of push/pop and procedures access all params from the stack as per stdcall.

As for JWasm not supporting CodeView V8 symbolic debug info... I've been getting some form of results thus-far:

c:\jwasm\jwasm -c -Zi  -Zd -win64 -Zp8 test64.asm
link /subsystem:windows /debug /pdb:test64.pdb /Libpath:"C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\Lib\x64" /Libpath:"c:\masm32\lib" test64.obj

When viewed in visual studio 2010 debug mode gives me the attached scrn-shot. You'll see there are params and locals listed, their values are just bogus because VS can't determine the correct stack location for them.
Oddly however, when i assemble another file exactly the same way.. i then don't get any local/params at all... its a bit flaky.




johnsa

I see it's says v8 support for next release on the jwasm site... i wonder if that means 2.07 or 2.08... if it's not in 2.07 i don't think i can wait much longer... :(
Really am going to land up with 2 options here, move to porting my app to C/C++ 64bit (yuck) ... or write my own assembler.. neither option really appeals to me..

johnsa

Looking at some more VC disasm.. it appears C shadows the floats etc. to the stack as well.. to ensure things like fld will still work..
This seems to be in contravention to the ABI but makes perfect sense..



void MyFunc2(float a)
{
000000013FEE1610  movss       dword ptr [rsp+8],xmm0 
000000013FEE1616  push        rdi 
a = a + 1.0f;
000000013FEE1617  movss       xmm0,dword ptr [a] 
000000013FEE161D  addss       xmm0,dword ptr [__real@3f800000 (13FEE78FCh)] 
000000013FEE1625  movss       dword ptr [a],xmm0 
}
000000013FEE162B  pop         rdi 
000000013FEE162C  ret 


donkey

That behaviour is specified in the ABI, C will shadow the floats because of its vararg type function calls, the float value must be available in both the XMM register and the integer register so it will always be shadowed if it is in the first 4 arguments.

http://msdn.microsoft.com/en-us/library/dd2wa36c%28v=vs.80%29.aspx
"Ahhh, what an awful dream. Ones and zeroes everywhere...[shudder] and I thought I saw a two." -- Bender
"It was just a dream, Bender. There's no such thing as two". -- Fry
-- Futurama

Donkey's Stable

habran

#12
I have successfully build what I promised (automatic calculation of shadow space)
now we don't need to use the local shadows
here are the compressed binaries



habran

#13
Warning for this build:
this is build of JWasm2.06e source and it was built with PellesC

it can build up to 4096 functions
every function must have RET  or IRET at the end
so for example :

WinMainCRTStartup proc FRAME
    invoke GetModuleHandleA, NULL
    mov    hInstance, rax
    invoke GetCommandLineA
    mov    CommandLine, rax
    invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
    invoke ExitProcess, eax

WinMainCRTStartup endp


will not have shadow space

but
WinMainCRTStartup proc FRAME
    invoke GetModuleHandleA, NULL
    mov    hInstance, rax
    invoke GetCommandLineA
    mov    CommandLine, rax
    invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
    invoke ExitProcess, eax
    ret
WinMainCRTStartup endp

will have

if anybody is interested in source changes I will post it here
I hope you'll enjoy

regards


johnsa

Good work!

Still picking up a few things.

If i do identical asm and C codes.. the C (under VS2010 subs 288 bytes from RSP) where as the identical ASM (same params, same locals) does a sub RSP,224
So I'm not sure if it's missing some space for alignment or something..



WinMainX proc FRAME hInst:HINSTANCE, hPrevInst:HINSTANCE, CmdLine:LPSTR, CmdShow:UINT

local wc:WNDCLASSEXA
local msg:MSG
local hwnd:HWND

    mov   hInst, rcx
    mov   wc.cbSize, SIZEOF WNDCLASSEXA
    mov   wc.style, CS_HREDRAW or CS_VREDRAW
    a2m   wc.lpfnWndProc, WndProc
    mov   wc.cbClsExtra, NULL
    mov   wc.cbWndExtra, NULL
    mov   wc.hInstance, rcx
    mov   wc.hbrBackground, COLOR_WINDOW+1
    mov   wc.lpszMenuName, NULL
    a2m   wc.lpszClassName, ClassName
    invoke LoadIconA, NULL, IDI_APPLICATION
    mov   wc.hIcon, rax
    mov   wc.hIconSm, rax
    invoke LoadCursorA, NULL, IDC_ARROW
    mov   wc.hCursor,rax
    invoke RegisterClassExA, addr wc
    invoke CreateWindowExA, NULL, ADDR ClassName, ADDR AppName,\
           WS_OVERLAPPEDWINDOW, CW_USEDEFAULT,\
           CW_USEDEFAULT, CW_USEDEFAULT,CW_USEDEFAULT, NULL, NULL,\
           hInst, NULL
    mov   hwnd,rax
    invoke ShowWindow, hwnd, SW_SHOWNORMAL
    invoke UpdateWindow, hwnd
    .while (1)
        invoke GetMessageA, ADDR msg, NULL, 0, 0
        .break .if (!rax)
        invoke TranslateMessage, ADDR msg
        invoke DispatchMessageA, ADDR msg
    .endw
    mov   rax, msg.wParam
    ret
WinMainX endp


and the C one..



int APIENTRY _tWinMain(HINSTANCE hInstance,
                     HINSTANCE hPrevInstance,
                     LPTSTR    lpCmdLine,
                     int       nCmdShow)
{
UNREFERENCED_PARAMETER(hPrevInstance);
UNREFERENCED_PARAMETER(lpCmdLine);

// TODO: Place code here.
MSG msg;
HWND hWnd;
WNDCLASSEX wcex;

wcex.cbSize = sizeof(WNDCLASSEX);

wcex.style = CS_HREDRAW | CS_VREDRAW;
wcex.lpfnWndProc = WndProc;
wcex.cbClsExtra = 0;
wcex.cbWndExtra = 0;
wcex.hInstance = hInstance;
wcex.hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(IDI_TESTC64));
wcex.hCursor = LoadCursor(NULL, IDC_ARROW);
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
wcex.lpszMenuName = MAKEINTRESOURCE(IDC_TESTC64);
wcex.lpszClassName = szWindowClass;
wcex.hIconSm = LoadIcon(wcex.hInstance, MAKEINTRESOURCE(IDI_SMALL));

RegisterClassEx(&wcex);

hInst = hInstance; // Store instance handle in our global variable
hWnd = CreateWindowEx(NULL,szWindowClass, szTitle, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, NULL, hInstance, NULL);

   ShowWindow(hWnd, nCmdShow);
   UpdateWindow(hWnd);

return (int) msg.wParam;
}


In fact switching the C code between CreateWindow and CreateWindowEx (which has an extra param) still leaves the RSP subtraction at 120h (288).
So it would appear that it rounds it up to some sort of multiple?

If this is working 100% it would be great to get it into Japheth's 2.07 release....
All we're missing then is an option to align a local struct and codeview/debugging to work...