Hi All,
I have a C++ source code that I need to convert to Assembly.
Does anyone have a tool that does this automatically or a reference to where I can get one?
compile it - that's all :bg
Use a disassembler to get the asm-code
QuoteUse a disassembler to get the asm-code
But then it may be extremely bloated. :boohoo:
The experience of the original C++ programmer, the conditions under which it was compiled, and the compiler used, can all have a very significant effect on that bloating. I have seen some real horrors. :eek
If it's very small code then using a disasm tool might be ok. But for anything of significance ... you can almost forget it because of all the C++ overhead. It's possible, but you might not like it.
Thanks everyone,
I will proceed onward.
compile it - then disassemble it
Quote from: szqh97 on October 07, 2009, 02:10:30 PM
compile it - then disassemble it
Same answer at the second time!
Try to do this:
> gcc -S test.cpp
Hope i'm helping! :U
Using "Intel C++" you can directly generate 100% MASM 32 or 64 compileable assembler code.
It has a huge benefit too if you have a code highly vectorizeable and compile it to specific SIMD you can get optimized SSE like you would never dream off in assembler. :bg
Quote from: Ficko on October 12, 2009, 07:45:58 AM
you can get optimized SSE like you would never dream off in assembler. :bg
For these words, NightWare will eat you alive, Ficko :bg
The free Microsoft C++ Express Version will disassemble C into meaningful output, including named locals, ready or nearly ready to assemble with MASM. I once ported a 250K C program that way, removing some but not all of the overhead. But that's not the smartest of C compilers, and you'll see what I mean if you try it. :toothy But it's a whole lot better than the disassembly by (e.g.) dumppe.exe, because you have the C source and the MS compiler gives you a line-by-line comparison between that source and what it assembles.
Thank Larry, JJ, Ficko
I have been working on this or my issue off line. I have obtained a C compiler and things ar working as Larry pointed out, its just at a slower pace than I anticipated.
Larry,
For what it's worth, here is what I do for conversions of C code, after the algorithm is cleaned up as much as possible:
--------------------------------------------------------------------------------
// This was the original C function:
// /* Search case-insensitive for a substring. */
// WCHAR *stristrW(WCHAR *Haystack, WCHAR *Needle) {
// WCHAR *p1;
// size_t i;
//
// if ((Haystack == NULL) || (Needle == NULL)) return(NULL);
//
// p1 = Haystack;
// i = wcslen(Needle);
// while (*p1 != 0) {
// if (_wcsnicmp(p1,Needle,i) == 0) return(p1);
// p1++;
// }
//
// return(NULL);
// }
--------------------------------------------------------------------------------
// This is my C version after cleanup and forcing single entry, single exit:
// This is the C code to be converted to asm:
//
// Naked char *FindIStr(char *Haystack, char *Needle)
// {
// register char *p1;
// register size_t i;
// char *RetVal = NULL;
//
// if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
// else1
// for1 ((i = FindLength(Needle)), (p1 = Haystack);
// ((p1 = FindIChar(p1, *Needle)) != NULL);
// p1++)
// if2 (CmpNIStr(p1, Needle, i) == 0)
// {
// RetVal = p1;
// break;
// }
//
// return RetVal;
// }
--------------------------------------------------------------------------------
// This is the combined C and converted asm code - the actual function definition.
// Naked char *FindIStr(char *Haystack, char *Needle)
// {
__asm
{
; esi = Needle
; edi = p1 = scan Haystack
; ebx = i = FindLength(Needle)
; ecx = RetVal
; edx = d = *String
#define FindIStrPushSize (5 * (TYPE __int32))
#define FindIStrIPOff (FindIStrPushSize)
#define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
#define FindIStrNeedleOff (FindIStrHaystackOff + (TYPE __int32))
push esi
push edi
push ebx
push ecx
push edx
// register char *p1;
// register size_t i;
// char *RetVal = NULL;
xor ecx,ecx
//
// if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
;If1TestA:
cmp [esp + FindIStrHaystackOff],0
; jz If1Body
jz Else1Exit
;If1TestB:
cmp [esp + FindIStrNeedleOff],0
; jnz Else1Body
jz Else1Exit
;If1Body:
; jmp Else1Exit
;If1Exit:
// else1
;Else1Body:
// for1 ((i = FindLength(Needle)), (p1 = Haystack);
;For1InitA:
mov esi,[esp + FindIStrNeedleOff]
mov ebx,esi
FindLength(ebx)
;For1InitB:
mov edi,[esp + FindIStrHaystackOff]
jmp For1Test
// p1++) /* re=ordered the elements as C does */
For1Increment:
inc edi
// ((p1 = FindIChar(p1, *Needle)) != NULL);
For1Test:
movzx eax,BYTE PTR [esi]
push eax
push edi
call FindIChar
add esp,8
mov edi,eax
test edi,edi
; jnz For1Body
; jmp For1Exit
je For1Exit
;For1Body:
// if2 (CmpNIStr(p1, Needle, i) == 0)
;If2Test:
push ebx
push esi
push edi
call CmpNIStr
add esp,12
cmp eax,0
; jz If2Body
; jmp If2Exit
jnz If2Exit
// {
;If2Body:
// RetVal = p1;
mov ecx,edi
// break;
jmp For1exit
// }
If2Exit:
jmp For1Increment
For1Exit:
Else1Exit:
//
// return RetVal;
// }
mov eax,ecx
pop edx
pop ecx
pop ebx
pop edi
pop esi
ret 0
}
// End of ASM code
}
// End of C code
--------------------------------------------------------------------------------
// This is the resultant .COD file code
PUBLIC _FindIStr
; Function compile flags: /Odtp
_TEXT SEGMENT
_Haystack$ = 8 ; size = 4
_Needle$ = 12 ; size = 4
_FindIStr PROC
; 2636 :
; 2637 :
; 2638 :
; 2639 :
; 2640 : // This is the C code to be converted to asm
; 2641 : // Naked char *FindIStr(char *Haystack, char *Needle)
; 2642 : // {
; 2643 : // register char *p1;
; 2644 : // register size_t i;
; 2645 : // char *RetVal = NULL;
; 2646 : //
; 2647 : // if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
; 2648 : // else1
; 2649 : // for1 ((i = FindLength(Needle)), (p1 = Haystack);
; 2650 : // ((p1 = FindIChar(p1, *Needle)) != NULL);
; 2651 : // p1++)
; 2652 : // if2 (CmpNIStr(p1, Needle, i) == 0)
; 2653 : // {
; 2654 : // RetVal = p1;
; 2655 : // break;
; 2656 : // }
; 2657 : //
; 2658 : // return RetVal;
; 2659 : // }
; 2660 :
; 2661 :
; 2662 :
; 2663 :
; 2664 : /* This is the converted asm code
; 2665 : __asm
; 2666 : {
; 2667 : ; esi = Needle
; 2668 : ; edi = p1 = scan Haystack
; 2669 : ; ebx = i = FindLength(Needle)
; 2670 : ; ecx = RetVal
; 2671 : ; edx = d = *String
; 2672 : #define FindIStrPushSize (5 * (TYPE __int32))
; 2673 : #define FindIStrIPOff (FindIStrPushSize)
; 2674 : #define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
; 2675 : #define FindIStrNeedleOff (FindIStrHaystackOff + (TYPE __int32))
; 2676 : push esi
; 2677 : push edi
; 2678 : push ebx
; 2679 : push ecx
; 2680 : push edx
; 2681 : xor ecx,ecx
; 2682 : ;If1TestA:
; 2683 : cmp [esp + FindIStrHaystackOff],0
; 2684 : ; jz If1Body
; 2685 : jz Else1Exit
; 2686 : ;If1TestB:
; 2687 : cmp [esp + FindIStrNeedleOff],0
; 2688 : ; jnz Else1Body
; 2689 : jz Else1Exit
; 2690 : ;If1Body:
; 2691 : ; jmp Else1Exit
; 2692 : ;If1Exit:
; 2693 : ;Else1Body:
; 2694 : ;For1InitA:
; 2695 : mov esi,[esp + FindIStrNeedleOff]
; 2696 : mov ebx,esi
; 2697 : FindLength(ebx)
; 2698 : ;For1InitB:
; 2699 : mov edi,[esp + FindIStrHaystackOff]
; 2700 : jmp For1Test
; 2701 : For1Increment:
; 2702 : inc edi
; 2703 : For1Test:
; 2704 : movzx eax,BYTE PTR [esi]
; 2705 : push eax
; 2706 : push edi
; 2707 : call FindIChar
; 2708 : add esp,8
; 2709 : mov edi,eax
; 2710 : test edi,edi
; 2711 : ; jnz For1Body
; 2712 : ; jmp For1Exit
; 2713 : je For1Exit
; 2714 : ;For1Body:
; 2715 : ;If2Test:
; 2716 : push ebx
; 2717 : push esi
; 2718 : push edi
; 2719 : call CmpNIStr
; 2720 : add esp,12
; 2721 : cmp eax,0
; 2722 : ; jz If2Body
; 2723 : ; jmp If2Exit
; 2724 : jnz If2Exit
; 2725 : ;If2Body:
; 2726 : mov ecx,edi
; 2727 : jmp For1exit
; 2728 : If2Exit:
; 2729 : jmp For1Increment
; 2730 : For1Exit:
; 2731 : Else1Exit:
; 2732 : mov eax,ecx
; 2733 : pop edx
; 2734 : pop ecx
; 2735 : pop ebx
; 2736 : pop edi
; 2737 : pop esi
; 2738 : ret 0
; 2739 : }
; 2740 : }
; 2741 : */
; 2742 :
; 2743 :
; 2744 :
; 2745 :
; 2746 : // This is the combined C and converted asm code - the actual function definition.
; 2747 : // Naked char *FindIStr(char *Haystack, char *Needle)
; 2748 : // {
; 2749 : __asm
; 2750 : {
; 2751 : ; esi = Needle
; 2752 : ; edi = p1 = scan Haystack
; 2753 : ; ebx = i = FindLength(Needle)
; 2754 : ; ecx = RetVal
; 2755 : ; edx = d = *String
; 2756 : #define FindIStrPushSize (5 * (TYPE __int32))
; 2757 : #define FindIStrIPOff (FindIStrPushSize)
; 2758 : #define FindIStrHaystackOff (FindIStrIPOff + (TYPE __int32))
; 2759 : #define FindIStrNeedleOff (FindIStrHaystackOff + (TYPE __int32))
; 2760 : push esi
024e0 56 push esi
; 2761 : push edi
024e1 57 push edi
; 2762 : push ebx
024e2 53 push ebx
; 2763 : push ecx
024e3 51 push ecx
; 2764 : push edx
024e4 52 push edx
; 2765 : // register char *p1;
; 2766 : // register size_t i;
; 2767 : // char *RetVal = NULL;
; 2768 : xor ecx,ecx
024e5 33 c9 xor ecx, ecx
; 2769 : //
; 2770 : // if1 ((Haystack == NULL) || (Needle == NULL)) /* Nothing */;
; 2771 : ;If1TestA:
; 2772 : cmp [esp + FindIStrHaystackOff],0
024e7 80 7c 24 18 00 cmp BYTE PTR [esp+24], 0
; 2773 : ; jz If1Body
; 2774 : jz Else1Exit
024ec 74 54 je SHORT $Else1Exit$61281
; 2775 : ;If1TestB:
; 2776 : cmp [esp + FindIStrNeedleOff],0
024ee 80 7c 24 1c 00 cmp BYTE PTR [esp+28], 0
; 2777 : ; jnz Else1Body
; 2778 : jz Else1Exit
024f3 74 4d je SHORT $Else1Exit$61281
; 2779 : ;If1Body:
; 2780 : ; jmp Else1Exit
; 2781 : ;If1Exit:
; 2782 : // else1
; 2783 : ;Else1Body:
; 2784 : // for1 ((i = FindLength(Needle)), (p1 = Haystack);
; 2785 : ;For1InitA:
; 2786 : mov esi,[esp + FindIStrNeedleOff]
024f5 8b 74 24 1c mov esi, DWORD PTR [esp+28]
; 2787 : mov ebx,esi
024f9 8b de mov ebx, esi
; 2788 : FindLength(ebx)
024fb 53 push ebx
024fc 8d 64 24 00 npad 4
02500 80 3b 00 cmp BYTE PTR [ebx], 0
02503 0f 84 03 00 00
00 je $+9
02509 43 inc ebx
0250a eb f4 jmp SHORT $+-10
0250c 2b 1c 24 sub ebx, DWORD PTR [esp]
0250f 83 c4 04 add esp, 4
; 2789 : ;For1InitB:
; 2790 : mov edi,[esp + FindIStrHaystackOff]
02512 8b 7c 24 18 mov edi, DWORD PTR [esp+24]
; 2791 : jmp For1Test
02516 eb 01 jmp SHORT $For1Test$61282
$For1Increment$61283:
; 2792 : // p1++) /* re=ordered the elements as C does */
; 2793 : For1Increment:
; 2794 : inc edi
02518 47 inc edi
$For1Test$61282:
; 2795 : // ((p1 = FindIChar(p1, *Needle)) != NULL);
; 2796 : For1Test:
; 2797 : movzx eax,BYTE PTR [esi]
02519 0f b6 06 movzx eax, BYTE PTR [esi]
; 2798 : push eax
0251c 50 push eax
; 2799 : push edi
0251d 57 push edi
; 2800 : call FindIChar
0251e e8 00 00 00 00 call _FindIChar
; 2801 : add esp,8
02523 83 c4 08 add esp, 8
; 2802 : mov edi,eax
02526 8b f8 mov edi, eax
; 2803 : test edi,edi
02528 85 ff test edi, edi
; 2804 : ; jnz For1Body
; 2805 : ; jmp For1Exit
; 2806 : je For1Exit
0252a 74 16 je SHORT $For1Exit$61284
; 2807 : ;For1Body:
; 2808 : // if2 (CmpNIStr(p1, Needle, i) == 0)
; 2809 : ;If2Test:
; 2810 : push ebx
0252c 53 push ebx
; 2811 : push esi
0252d 56 push esi
; 2812 : push edi
0252e 57 push edi
; 2813 : call CmpNIStr
0252f e8 00 00 00 00 call _CmpNIStr
; 2814 : add esp,12
02534 83 c4 0c add esp, 12 ; 0000000cH
; 2815 : cmp eax,0
02537 83 f8 00 cmp eax, 0
; 2816 : ; jz If2Body
; 2817 : ; jmp If2Exit
; 2818 : jnz If2Exit
0253a 75 04 jne SHORT $If2Exit$61285
; 2819 : // {
; 2820 : ;If2Body:
; 2821 : // RetVal = p1;
; 2822 : mov ecx,edi
0253c 8b cf mov ecx, edi
; 2823 : // break;
; 2824 : jmp For1exit
0253e eb 02 jmp SHORT $For1exit$61286
$If2Exit$61285:
; 2825 : // }
; 2826 : If2Exit:
; 2827 : jmp For1Increment
02540 eb d6 jmp SHORT $For1Increment$61283
$For1exit$61286:
$For1Exit$61284:
$Else1Exit$61281:
; 2828 : For1Exit:
; 2829 : Else1Exit:
; 2830 : //
; 2831 : // return RetVal;
; 2832 : // }
; 2833 : mov eax,ecx
02542 8b c1 mov eax, ecx
; 2834 : pop edx
02544 5a pop edx
; 2835 : pop ecx
02545 59 pop ecx
; 2836 : pop ebx
02546 5b pop ebx
; 2837 : pop edi
02547 5f pop edi
; 2838 : pop esi
02548 5e pop esi
; 2839 : ret 0
02549 c3 ret 0
_FindIStr ENDP
Dave.
Dave, Excellent technique. Its been a while in seeing this. Question: How do you handle various topics, such as multiple dimmension arrays, or creating and using database indexs using various active nodes; or do you just go by C app to C app and try to find your learned-ed-ed code when needed for reference.
Z,
I have not really worked with C++ much, only C. Double indexing I try to do with table lookup (to avoid multiply) whenever possible. Have not worked with databases, but getting a .COD file from compiled C++ code should show what indexing is necessary. This however requires that the database format doesn't change, otherwise a new compile and then reassembly is required - not too dynamic.
Note in the C code conversion, I include the C code as commentary, and then comment out and change the ugly conditional jumps around a JMP to clean it up. The primary thing is to get the highest used variables into a register and keep them there. If you need to pass input parameters to subsequent functions, then you need to save the ESP at entry (I.E. in EBP) - otherwise indexing via ESP changes as you push the call parameters.
Dave.
Thanks Dave
Steve
Steve,
Be sure to do a compile and check the .COD file to see what unexpected things the compiler does for Protected Mode calls like FormatMessageW - things that the API documentation description does not even talk about.
Dave.