News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

C++ Conversion to Assembly

Started by z941998, September 30, 2009, 11:10:01 PM

Previous topic - Next topic

z941998

Hi All,

I have a C++ source code that I need to convert to Assembly.
Does anyone have a tool that does this automatically or a reference to where I can get one?


qWord

compile it - that's all  :bg
Use a disassembler to get the asm-code
FPU in a trice: SmplMath
It's that simple!

raymond

QuoteUse a disassembler to get the asm-code

But then it may be extremely bloated.  :boohoo:

The experience of the original C++ programmer, the conditions under which it was compiled, and the compiler used, can all have a very significant effect on that bloating. I have seen some real horrors. :eek
When you assume something, you risk being wrong half the time
http://www.ray.masmcode.com

thomas_remkus

If it's very small code then using a disasm tool might be ok. But for anything of significance ... you can almost forget it because of all the C++ overhead. It's possible, but you might not like it.

z941998

Thanks everyone,

I will proceed onward.

szqh97


nathanpc


nathanpc

Try to do this:
> gcc -S test.cpp

Hope i'm helping! :U

Ficko

Using "Intel C++" you can directly generate 100% MASM 32 or 64 compileable assembler code.
It has a huge benefit too if you have a code highly vectorizeable and compile it to specific SIMD you can get optimized SSE like you would never dream off in assembler. :bg

jj2007

Quote from: Ficko on October 12, 2009, 07:45:58 AM
you can get optimized SSE like you would never dream off in assembler. :bg

For these words, NightWare will eat you alive, Ficko :bg

Larry Hammick

The free Microsoft C++ Express Version will disassemble C into meaningful output, including named locals, ready or nearly ready to assemble with MASM. I once ported a 250K C program that way, removing some but not all of the overhead. But that's not the smartest of C compilers, and you'll see what I mean if you try it.  :toothy But it's a whole lot better than the disassembly by (e.g.) dumppe.exe, because you have the C source and the MS compiler gives you a line-by-line comparison between that source and what it assembles.

z941998

Thank Larry, JJ, Ficko

I have been working on this or my issue off line.  I have obtained a C compiler and things ar working as Larry pointed out, its just at a slower pace than I anticipated.


KeepingRealBusy

Larry,

For what it's worth, here is what I do for conversions of C code, after the algorithm is cleaned up as much as possible:

--------------------------------------------------------------------------------

//  This was the original C function:

//  /* Search case-insensitive for a substring. */
//  WCHAR *stristrW(WCHAR *Haystack, WCHAR *Needle) {
//    WCHAR *p1;
//    size_t i;
//
//    if ((Haystack == NULL) || (Needle == NULL)) return(NULL);
//
//    p1 = Haystack;
//    i = wcslen(Needle);
//    while (*p1 != 0) {
//      if (_wcsnicmp(p1,Needle,i) == 0) return(p1);
//      p1++;
//      }
//
//    return(NULL);
//    }

--------------------------------------------------------------------------------

//    This is my C version after cleanup and forcing single entry, single exit:

//  This is the C code to be converted to asm:
//
//  Naked char *FindIStr(char *Haystack, char *Needle)
//  {
//      register  char *p1;
//      register  size_t i;
//      char      *RetVal = NULL;
// 
//      if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
//      else1
//          for1 ((i = FindLength(Needle)), (p1 = Haystack);
//                ((p1 = FindIChar(p1, *Needle)) != NULL);
//                p1++)
//               if2 (CmpNIStr(p1, Needle, i) == 0)
//                   {
//                   RetVal = p1;
//                   break;
//                   }
// 
//      return RetVal;
//  }

--------------------------------------------------------------------------------

//  This is the combined C and converted asm code - the actual function definition.
//  Naked char *FindIStr(char *Haystack, char *Needle)
//  {
    __asm
    {
    ;  esi = Needle
    ;  edi = p1 = scan Haystack
    ;  ebx = i = FindLength(Needle)
    ;  ecx = RetVal
    ;  edx = d = *String
    #define FindIStrPushSize    (5 * (TYPE __int32))
    #define FindIStrIPOff       (FindIStrPushSize)
    #define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
    #define FindIStrNeedleOff   (FindIStrHaystackOff + (TYPE __int32))
    push   esi
    push   edi
    push   ebx
    push   ecx
    push   edx
//      register  char *p1;
//      register  size_t i;
//      char      *RetVal = NULL;
    xor    ecx,ecx
// 
//      if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
;If1TestA:
    cmp    [esp + FindIStrHaystackOff],0
;   jz     If1Body
    jz     Else1Exit
;If1TestB:
    cmp    [esp + FindIStrNeedleOff],0
;   jnz    Else1Body
    jz     Else1Exit
;If1Body:
;   jmp    Else1Exit
;If1Exit:
//      else1
;Else1Body:
//          for1 ((i = FindLength(Needle)), (p1 = Haystack);
;For1InitA:
    mov    esi,[esp + FindIStrNeedleOff]
    mov    ebx,esi
    FindLength(ebx)
;For1InitB:
    mov    edi,[esp + FindIStrHaystackOff]
    jmp    For1Test
//                p1++) /* re=ordered the elements as C does */
For1Increment:
    inc    edi
//                ((p1 = FindIChar(p1, *Needle)) != NULL);
For1Test:
    movzx  eax,BYTE PTR [esi]
    push   eax
    push   edi
    call   FindIChar
    add    esp,8
    mov    edi,eax
    test   edi,edi
;   jnz    For1Body
;   jmp    For1Exit
    je     For1Exit
;For1Body:
//               if2 (CmpNIStr(p1, Needle, i) == 0)
;If2Test:
    push   ebx
    push   esi
    push   edi
    call   CmpNIStr
    add    esp,12
    cmp    eax,0
;   jz     If2Body
;   jmp    If2Exit
    jnz    If2Exit
//                   {
;If2Body:
//                   RetVal = p1;
    mov    ecx,edi
//                   break;
    jmp    For1exit
//                   }
If2Exit:
    jmp    For1Increment
For1Exit:
Else1Exit:
// 
//      return RetVal;
//  }
    mov    eax,ecx
    pop    edx
    pop    ecx
    pop    ebx
    pop    edi
    pop    esi
    ret    0
    }
    //  End of ASM code
}
//  End of C code

--------------------------------------------------------------------------------

//  This is the resultant .COD file code

PUBLIC _FindIStr
; Function compile flags: /Odtp
_TEXT SEGMENT
_Haystack$ = 8 ; size = 4
_Needle$ = 12 ; size = 4
_FindIStr PROC

; 2636 :
; 2637 :
; 2638 :
; 2639 :
; 2640 : //  This is the C code to be converted to asm
; 2641 : //  Naked char *FindIStr(char *Haystack, char *Needle)
; 2642 : //  {
; 2643 : //      register  char *p1;
; 2644 : //      register  size_t i;
; 2645 : //      char      *RetVal = NULL;
; 2646 : // 
; 2647 : //      if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
; 2648 : //      else1
; 2649 : //          for1 ((i = FindLength(Needle)), (p1 = Haystack);
; 2650 : //                ((p1 = FindIChar(p1, *Needle)) != NULL);
; 2651 : //                p1++)
; 2652 : //               if2 (CmpNIStr(p1, Needle, i) == 0)
; 2653 : //                   {
; 2654 : //                   RetVal = p1;
; 2655 : //                   break;
; 2656 : //                   }
; 2657 : // 
; 2658 : //      return RetVal;
; 2659 : //  }
; 2660 :
; 2661 :
; 2662 :
; 2663 :
; 2664 : /*  This is the converted asm code
; 2665 :     __asm
; 2666 :     {
; 2667 :     ;  esi = Needle
; 2668 :     ;  edi = p1 = scan Haystack
; 2669 :     ;  ebx = i = FindLength(Needle)
; 2670 :     ;  ecx = RetVal
; 2671 :     ;  edx = d = *String
; 2672 :     #define FindIStrPushSize    (5 * (TYPE __int32))
; 2673 :     #define FindIStrIPOff       (FindIStrPushSize)
; 2674 :     #define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
; 2675 :     #define FindIStrNeedleOff   (FindIStrHaystackOff + (TYPE __int32))
; 2676 :     push   esi
; 2677 :     push   edi
; 2678 :     push   ebx
; 2679 :     push   ecx
; 2680 :     push   edx
; 2681 :     xor    ecx,ecx
; 2682 : ;If1TestA:
; 2683 :     cmp    [esp + FindIStrHaystackOff],0
; 2684 : ;   jz     If1Body
; 2685 :     jz     Else1Exit
; 2686 : ;If1TestB:
; 2687 :     cmp    [esp + FindIStrNeedleOff],0
; 2688 : ;   jnz    Else1Body
; 2689 :     jz     Else1Exit
; 2690 : ;If1Body:
; 2691 : ;   jmp    Else1Exit
; 2692 : ;If1Exit:
; 2693 : ;Else1Body:
; 2694 : ;For1InitA:
; 2695 :     mov    esi,[esp + FindIStrNeedleOff]
; 2696 :     mov    ebx,esi
; 2697 :     FindLength(ebx)
; 2698 : ;For1InitB:
; 2699 :     mov    edi,[esp + FindIStrHaystackOff]
; 2700 :     jmp    For1Test
; 2701 : For1Increment:
; 2702 :     inc    edi
; 2703 : For1Test:
; 2704 :     movzx  eax,BYTE PTR [esi]
; 2705 :     push   eax
; 2706 :     push   edi
; 2707 :     call   FindIChar
; 2708 :     add    esp,8
; 2709 :     mov    edi,eax
; 2710 :     test   edi,edi
; 2711 : ;   jnz    For1Body
; 2712 : ;   jmp    For1Exit
; 2713 :     je     For1Exit
; 2714 : ;For1Body:
; 2715 : ;If2Test:
; 2716 :     push   ebx
; 2717 :     push   esi
; 2718 :     push   edi
; 2719 :     call   CmpNIStr
; 2720 :     add    esp,12
; 2721 :     cmp    eax,0
; 2722 : ;   jz     If2Body
; 2723 : ;   jmp    If2Exit
; 2724 :     jnz    If2Exit
; 2725 : ;If2Body:
; 2726 :     mov    ecx,edi
; 2727 :     jmp    For1exit
; 2728 : If2Exit:
; 2729 :     jmp    For1Increment
; 2730 : For1Exit:
; 2731 : Else1Exit:
; 2732 :     mov    eax,ecx
; 2733 :     pop    edx
; 2734 :     pop    ecx
; 2735 :     pop    ebx
; 2736 :     pop    edi
; 2737 :     pop    esi
; 2738 :     ret    0
; 2739 :     }
; 2740 : }
; 2741 : */
; 2742 :
; 2743 :
; 2744 :
; 2745 :
; 2746 : //  This is the combined C and converted asm code - the actual function definition.
; 2747 : //  Naked char *FindIStr(char *Haystack, char *Needle)
; 2748 : //  {
; 2749 :     __asm
; 2750 :     {
; 2751 :     ;  esi = Needle
; 2752 :     ;  edi = p1 = scan Haystack
; 2753 :     ;  ebx = i = FindLength(Needle)
; 2754 :     ;  ecx = RetVal
; 2755 :     ;  edx = d = *String
; 2756 :     #define FindIStrPushSize    (5 * (TYPE __int32))
; 2757 :     #define FindIStrIPOff       (FindIStrPushSize)
; 2758 :     #define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
; 2759 :     #define FindIStrNeedleOff   (FindIStrHaystackOff + (TYPE __int32))
; 2760 :     push   esi

  024e0 56 push esi

; 2761 :     push   edi

  024e1 57 push edi

; 2762 :     push   ebx

  024e2 53 push ebx

; 2763 :     push   ecx

  024e3 51 push ecx

; 2764 :     push   edx

  024e4 52 push edx

; 2765 : //      register  char *p1;
; 2766 : //      register  size_t i;
; 2767 : //      char      *RetVal = NULL;
; 2768 :     xor    ecx,ecx

  024e5 33 c9 xor ecx, ecx

; 2769 : // 
; 2770 : //      if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
; 2771 : ;If1TestA:
; 2772 :     cmp    [esp + FindIStrHaystackOff],0

  024e7 80 7c 24 18 00 cmp BYTE PTR [esp+24], 0

; 2773 : ;   jz     If1Body
; 2774 :     jz     Else1Exit

  024ec 74 54 je SHORT $Else1Exit$61281

; 2775 : ;If1TestB:
; 2776 :     cmp    [esp + FindIStrNeedleOff],0

  024ee 80 7c 24 1c 00 cmp BYTE PTR [esp+28], 0

; 2777 : ;   jnz    Else1Body
; 2778 :     jz     Else1Exit

  024f3 74 4d je SHORT $Else1Exit$61281

; 2779 : ;If1Body:
; 2780 : ;   jmp    Else1Exit
; 2781 : ;If1Exit:
; 2782 : //      else1
; 2783 : ;Else1Body:
; 2784 : //          for1 ((i = FindLength(Needle)), (p1 = Haystack);
; 2785 : ;For1InitA:
; 2786 :     mov    esi,[esp + FindIStrNeedleOff]

  024f5 8b 74 24 1c mov esi, DWORD PTR [esp+28]

; 2787 :     mov    ebx,esi

  024f9 8b de mov ebx, esi

; 2788 :     FindLength(ebx)

  024fb 53 push ebx
  024fc 8d 64 24 00 npad 4
  02500 80 3b 00 cmp BYTE PTR [ebx], 0
  02503 0f 84 03 00 00
00 je $+9
  02509 43 inc ebx
  0250a eb f4 jmp SHORT $+-10
  0250c 2b 1c 24 sub ebx, DWORD PTR [esp]
  0250f 83 c4 04 add esp, 4

; 2789 : ;For1InitB:
; 2790 :     mov    edi,[esp + FindIStrHaystackOff]

  02512 8b 7c 24 18 mov edi, DWORD PTR [esp+24]

; 2791 :     jmp    For1Test

  02516 eb 01 jmp SHORT $For1Test$61282
$For1Increment$61283:

; 2792 : //                p1++) /* re=ordered the elements as C does */
; 2793 : For1Increment:
; 2794 :     inc    edi

  02518 47 inc edi
$For1Test$61282:

; 2795 : //                ((p1 = FindIChar(p1, *Needle)) != NULL);
; 2796 : For1Test:
; 2797 :     movzx  eax,BYTE PTR [esi]

  02519 0f b6 06 movzx eax, BYTE PTR [esi]

; 2798 :     push   eax

  0251c 50 push eax

; 2799 :     push   edi

  0251d 57 push edi

; 2800 :     call   FindIChar

  0251e e8 00 00 00 00 call _FindIChar

; 2801 :     add    esp,8

  02523 83 c4 08 add esp, 8

; 2802 :     mov    edi,eax

  02526 8b f8 mov edi, eax

; 2803 :     test   edi,edi

  02528 85 ff test edi, edi

; 2804 : ;   jnz    For1Body
; 2805 : ;   jmp    For1Exit
; 2806 :     je     For1Exit

  0252a 74 16 je SHORT $For1Exit$61284

; 2807 : ;For1Body:
; 2808 : //               if2 (CmpNIStr(p1, Needle, i) == 0)
; 2809 : ;If2Test:
; 2810 :     push   ebx

  0252c 53 push ebx

; 2811 :     push   esi

  0252d 56 push esi

; 2812 :     push   edi

  0252e 57 push edi

; 2813 :     call   CmpNIStr

  0252f e8 00 00 00 00 call _CmpNIStr

; 2814 :     add    esp,12

  02534 83 c4 0c add esp, 12 ; 0000000cH

; 2815 :     cmp    eax,0

  02537 83 f8 00 cmp eax, 0

; 2816 : ;   jz     If2Body
; 2817 : ;   jmp    If2Exit
; 2818 :     jnz    If2Exit

  0253a 75 04 jne SHORT $If2Exit$61285

; 2819 : //                   {
; 2820 : ;If2Body:
; 2821 : //                   RetVal = p1;
; 2822 :     mov    ecx,edi

  0253c 8b cf mov ecx, edi

; 2823 : //                   break;
; 2824 :     jmp    For1exit

  0253e eb 02 jmp SHORT $For1exit$61286
$If2Exit$61285:

; 2825 : //                   }
; 2826 : If2Exit:
; 2827 :     jmp    For1Increment

  02540 eb d6 jmp SHORT $For1Increment$61283
$For1exit$61286:
$For1Exit$61284:
$Else1Exit$61281:

; 2828 : For1Exit:
; 2829 : Else1Exit:
; 2830 : // 
; 2831 : //      return RetVal;
; 2832 : //  }
; 2833 :     mov    eax,ecx

  02542 8b c1 mov eax, ecx

; 2834 :     pop    edx

  02544 5a pop edx

; 2835 :     pop    ecx

  02545 59 pop ecx

; 2836 :     pop    ebx

  02546 5b pop ebx

; 2837 :     pop    edi

  02547 5f pop edi

; 2838 :     pop    esi

  02548 5e pop esi

; 2839 :     ret    0

  02549 c3 ret 0
_FindIStr ENDP



Dave.

z941998

Dave, Excellent technique.  Its been a while in seeing this.  Question: How do you handle various topics, such as multiple dimmension arrays, or creating and using database indexs using various active nodes; or do you just go by C app to C app and try to find your learned-ed-ed code when needed for reference.

KeepingRealBusy

Z,

I have not really worked with C++ much, only C. Double indexing I try to do with table lookup (to avoid multiply) whenever possible. Have not worked with databases, but getting a .COD file from compiled  C++ code should show what indexing is necessary. This however requires that the database format doesn't change, otherwise a new compile and then reassembly is required - not too dynamic.

Note in the C code conversion, I include the C code as commentary, and then comment out and change the ugly conditional jumps around a JMP to clean it up. The primary thing is to get the highest used variables into a register and keep them there. If you need to pass input parameters to subsequent functions, then you need to save the ESP at entry (I.E. in EBP) - otherwise indexing via ESP changes as you push the call parameters.

Dave.