News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Writing a dissassembler

Started by MusicalMike, October 22, 2005, 10:15:46 PM

Previous topic - Next topic

MusicalMike

I am currently writing a dissassembler in C++. I am at one particular point, and I am just going to admit now I am stumped on how to proceed. I would use the MSDN Board, but Microsoft is notorious for having an extreme dislike for disassemblers. The code I have writen is as follows.


#pragma once
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <windows.h>

using namespace std;

char * lookUpTable[] = {
"00000000","00000001","00000010","00000011",
"00000100","00000101","00000110","00000111",
"00001000","00001001","00001010","00001011",
"00001100","00001101","00001110","00001111",
"00010000","00010001","00010010","00010011",
"00010100","00010101","00010110","00010111",
"00011000","00011001","00011010","00011011",
"00011100","00011101","00011110","00011111",
"00100000","00100001","00100010","00100011",
"00100100","00100101","00100110","00100111",
"00101000","00101001","00101010","00101011",
"00101100","00101101","00101110","00101111",
"00110000","00110001","00110010","00110011",
"00110100","00110101","00110110","00110111",
"00111000","00111001","00111010","00111011",
"00111100","00111101","00111110","00111111",
"01000000","01000001","01000010","01000011",
"01000100","01000101","01000110","01000111",
"01001000","01001001","01001010","01001011",
"01001100","01001101","01001110","01001111",
"01010000","01010001","01010010","01010011",
"01010100","01010101","01010110","01010111",
"01011000","01011001","01011010","01011011",
"01011100","01011101","01011110","01011111",
"01100000","01100001","01100010","01100011",
"01100100","01100101","01100110","01100111",
"01101000","01101001","01101010","01101011",
"01101100","01101101","01101110","01101111",
"01110000","01110001","01110010","01110011",
"01110100","01110101","01110110","01110111",
"01111000","01111001","01111010","01111011",
"01111100","01111101","01111110","01111111",
"10000000","10000001","10000010","10000011",
"10000100","10000101","10000110","10000111",
"10001000","10001001","10001010","10001011",
"10001100","10001101","10001110","10001111",
"10010000","10010001","10010010","10010011",
"10010100","10010101","10010110","10010111",
"10011000","10011001","10011010","10011011",
"10011100","10011101","10011110","10011111",
"10100000","10100001","10100010","10100011",
"10100100","10100101","10100110","10100111",
"10101000","10101001","10101010","10101011",
"10101100","10101101","10101110","10101111",
"10110000","10110001","10110010","10110011",
"10110100","10110101","10110110","10110111",
"10111000","10111001","10111010","10111011",
"10111100","10111101","10111110","10111111",
"11000000","11000001","11000010","11000011",
"11000100","11000101","11000110","11000111",
"11001000","11001001","11001010","11001011",
"11001100","11001101","11001110","11001111",
"11010000","11010001","11010010","11010011",
"11010100","11010101","11010110","11010111",
"11011000","11011001","11011010","11011011",
"11011100","11011101","11011110","11011111",
"11100000","11100001","11100010","11100011",
"11100100","11100101","11100110","11100111",
"11101000","11101001","11101010","11101011",
"11101100","11101101","11101110","11101111",
"11110000","11110001","11110010","11110011",
"11110100","11110101","11110110","11110111",
"11111000","11111001","11111010","11111011",
"11111100","11111101","11111110","11111111",
};
//Binary Lookup Table

int main()
{
HANDLE hFile;
HANDLE hFileMapping;
LPVOID lpFileBase;
//Necessary handles for loading and maping a file into memory

char filename[100];

cin >> filename;
//get name of file to disassemble.

hFile = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
//We want to open an exe file

if (hFile == INVALID_HANDLE_VALUE)
{   
cout << "Couldn't open file." << endl;
return 0;
}
//We also want to make sure nothing went wrong

hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hFileMapping == 0)
{   
CloseHandle(hFile);
cout << "Couldn't open file mapping." << endl;
return 0;
}
//Not quite sure what this step is for, but it seems to be necessary.

lpFileBase = MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0, 0);
if (lpFileBase == 0)
{
CloseHandle(hFileMapping);
CloseHandle(hFile);
cout << "Couldn't map view of file." << endl;
return 0;
}
//Find the beginning of the file in memory

PIMAGE_DOS_HEADER DOSHeader = (PIMAGE_DOS_HEADER)lpFileBase;
PIMAGE_NT_HEADERS NTHeaders = (PIMAGE_NT_HEADERS)(reinterpret_cast<BYTE *>(DOSHeader) + DOSHeader->e_lfanew);
PIMAGE_SYMBOL SymbolTable = (PIMAGE_SYMBOL)NTHeaders->FileHeader.PointerToSymbolTable;
PIMAGE_SECTION_HEADER SectionTable = (PIMAGE_SECTION_HEADER)(reinterpret_cast<BYTE *>(NTHeaders) + sizeof(IMAGE_NT_HEADERS));
//Load portions of the file into their respective Win32 API Data structures

for (int i = 0; i < NTHeaders->FileHeader.NumberOfSections; i++)
{
char * c = reinterpret_cast<char *>(SectionTable[i].Name);

cout << c << endl;
//Print name of section to console

if (strlen(c) == 5 && c[1] == 't')
{
c = reinterpret_cast<char *>(DOSHeader + SectionTable[i].VirtualAddress);
//No point in wasting four more bytes on another char pointer when c works perfectly fine.

for (int j = 0; j < SectionTable[i].SizeOfRawData; j++)
{
if (c[j] > -1)
{
cout << lookUpTable[c[j]] << endl;
//itterate through with lookup table and print string representation of binary value
}
else
{
cout << "-1" << endl;
//Still working this out
}
}
}
}
//Itterate through the Section headers in the exe file and extract data from them

Sleep(5000);
//Puase for 5 seconds so we have a chance to get a glimps of the output

UnmapViewOfFile(lpFileBase);
CloseHandle(hFileMapping);
CloseHandle(hFile);
//Release resources used by process

return 0;
}



I want to load the binary values of each byte of the .text section of an exe into a string, and use a look up table to produce the corosponding assembly code. I must admit though, I have no idea if the following code is infact doing what I am trying to make it do. Note that I for some reason have to test for -1 (typically used for end of file). The thing about this is, durring several test runs of the current code, -1 (end of file) has been incountered several times in the middle of the byte stream. I know this is not a c++ forum, but please try to bare with me. Any help on this would help. (I am aware that the .text section of the exe has refferences to things defined in other sections like the import table, however I would like to tackle one thing at a time)

Please bare in mind that I have limited experience with binary file formats.

Thanks in advance.

tenkey

VC treats plain char as signed char. Most (not all) C and C++ compilers do this. The result is that character values from 128 to 255 are actually converted, when an int is needed, to the range -128 to -1.
A programming language is low level when its programs require attention to the irrelevant.
Alan Perlis, Epigram #8

MusicalMike

Yes I am aware of this, what I need to know is how to be sure that the current code is actually reading data from the .text section of the exe and not other sections.

tenkey

Use DUMPBIN or an equivalent to check your data. If the executable is small enough, you can also inspect it with DEBUG.

You say you are aware of signed characters, but you don't say how you will correct your handling of them.
A programming language is low level when its programs require attention to the irrelevant.
Alan Perlis, Epigram #8

MusicalMike

I updateed the code to deal with the unsigned char problem. I realized that I had neglected to write the code to accomidate this. Heres the updated section.



for (int i = 0; i < NTHeaders->FileHeader.NumberOfSections; i++)
{
char * c = reinterpret_cast<char *>(SectionTable[i].Name);

cout << c << endl;
//Print name of section to console

if (c[1] == 't' && c[2] == 'e' && c[3] == 'x' && c[4] == 't')
{
unsigned char * c2 = reinterpret_cast<BYTE *>(DOSHeader + SectionTable[i].VirtualAddress);

try
{
for (int j = 0; j < SectionTable[i].SizeOfRawData; ++j)
{
cout << lookUpTable[c2[j]];
if (isprint(c2[j]))
{
cout << "    " << c2[j];
}
cout << endl;
//itterate through with lookup table and print string representation of binary value
}
}
catch (...)
{
cout << "-1" << endl;
}
}
}



However when I tried it on the program shown below,



.486
.model flat, stdcall

.code
Start:

xor eax, eax

End Start



The dump it produced consisted of over one hundred lines of binary strings. Obviously, since there is only one executible instruction in the whole program, I was expecting only four byte strings to be displayed, there fore I am not looking at the .text sections code. This is where I am out of ideas. Thanks in advance for the help.

tenkey

The virtual address is not a file offset.

There is a file offset field (PointerToRawData) in the section header for calculating the location of the raw data bytes.
If you're getting hundreds of bytes, then your SectionTable pointer isn't set correctly either. Or else you are unknowingly adding library code. You have VC, use DUMPBIN to inspect your executable.

You really need to use a file base pointer that has a subtype of BYTE or unsigned char. Otherwise, you'll keep forgetting to compensate for "pointer scaling" when you use the + operator with file offsets.
A programming language is low level when its programs require attention to the irrelevant.
Alan Perlis, Epigram #8

MusicalMike

Ill make sure all my pointer arythmatic are on byte pointers, however when I use the Pointer to raw data field, I usually get a GPF (General Protection Fault). If there is a pointer scaling problem in the code, it explains this anomoly. Thanks.

MusicalMike

I tried using the following line

PIMAGE_DOS_HEADER DOSHeader = (PIMAGE_DOS_HEADER)(reinterpret_cast<BYTE *>(lpFileBase);

This effectively broke the code.

Trying to access the PointerToRawData still causes access violation errors. Could you clairfy what you meant by your pervious post?

hutch--

Mike,

Have a look at this basic capacity for loading a PE file. It has the advantage of placing the sections in their loaded memory position rather than in disk image order and alignment. Run it with a simple exe file and ten open the result in a hex editor and you will see te sections tend to align on 1000 hex boundaries.


; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
    include \masm32\include\masm32rt.inc
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

comment * -----------------------------------------------------
                        Build this  template with
                       "CONSOLE ASSEMBLE AND LINK"
        ----------------------------------------------------- *

    .data?
      value dd ?

    .data
      item dd 0

    .code

start:
   
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

    call main

    exit

; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

main proc

    LOCAL hFile :DWORD
    LOCAL hObj  :DWORD
    LOCAL pDat  :DWORD

    mov hFile, fopen("timer2.exe")
    mov hObj, rv(CreateFileMapping,hFile,NULL,PAGE_READWRITE or SEC_IMAGE,0,0,NULL)

    mov pDat, rv(MapViewOfFile,hObj,FILE_MAP_WRITE,0,0,0)

    test OutputFile("result.bin",pDat,16384), eax

    fclose hFile

    ret

main endp

; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

end start
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

MusicalMike

Hutch, your a life saver, I have been agonizing over this for weeks. :clap:

tenkey

Quote from: MusicalMike on October 26, 2005, 02:02:12 AM
I tried using the following line

PIMAGE_DOS_HEADER DOSHeader = (PIMAGE_DOS_HEADER)(reinterpret_cast<BYTE *>(lpFileBase);

This effectively broke the code.

Trying to access the PointerToRawData still causes access violation errors. Could you clairfy what you meant by your pervious post?

The code didn't break - it's still broken. DOSHeader is not a BYTE pointer. Any attempt to access data with (DOSHeader + some_offset) will fail to work properly. *(DOSHeader + i) is the same as DOSHeader, so the value in i will be multiplied before adding to the base address stored in DOSHeader. As you did elsewhere, you need to use reinterpret_cast<BYTE *>(DOSHeader) before adding any offset values you find or calculate.

If you define lpFileBase as a BYTE*, then all you need to do is find or calculate file offsets, and then recast at the last minute.

struct_ptr = reinterpret_cast<Whatever *>(lpFileBase + file_offset);

(reinterpret_cast<Whatever *>(lpFileBase + file_offset)) -> field

This, of course, assumes that lpFileBase actually points to the beginning of the file image. (I wouldn't know, I don't use file mapping.)
A programming language is low level when its programs require attention to the irrelevant.
Alan Perlis, Epigram #8

MusicalMike

Thanks for everything, After playing around with it for a while, the code seems to work fine now.

The "working" code is listed bellow.


#pragma once
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <windows.h>

using namespace std;

char * lookUpTable[] = {
"00000000","00000001","00000010","00000011",
"00000100","00000101","00000110","00000111",
"00001000","00001001","00001010","00001011",
"00001100","00001101","00001110","00001111",
"00010000","00010001","00010010","00010011",
"00010100","00010101","00010110","00010111",
"00011000","00011001","00011010","00011011",
"00011100","00011101","00011110","00011111",
"00100000","00100001","00100010","00100011",
"00100100","00100101","00100110","00100111",
"00101000","00101001","00101010","00101011",
"00101100","00101101","00101110","00101111",
"00110000","00110001","00110010","00110011",
"00110100","00110101","00110110","00110111",
"00111000","00111001","00111010","00111011",
"00111100","00111101","00111110","00111111",
"01000000","01000001","01000010","01000011",
"01000100","01000101","01000110","01000111",
"01001000","01001001","01001010","01001011",
"01001100","01001101","01001110","01001111",
"01010000","01010001","01010010","01010011",
"01010100","01010101","01010110","01010111",
"01011000","01011001","01011010","01011011",
"01011100","01011101","01011110","01011111",
"01100000","01100001","01100010","01100011",
"01100100","01100101","01100110","01100111",
"01101000","01101001","01101010","01101011",
"01101100","01101101","01101110","01101111",
"01110000","01110001","01110010","01110011",
"01110100","01110101","01110110","01110111",
"01111000","01111001","01111010","01111011",
"01111100","01111101","01111110","01111111",
"10000000","10000001","10000010","10000011",
"10000100","10000101","10000110","10000111",
"10001000","10001001","10001010","10001011",
"10001100","10001101","10001110","10001111",
"10010000","10010001","10010010","10010011",
"10010100","10010101","10010110","10010111",
"10011000","10011001","10011010","10011011",
"10011100","10011101","10011110","10011111",
"10100000","10100001","10100010","10100011",
"10100100","10100101","10100110","10100111",
"10101000","10101001","10101010","10101011",
"10101100","10101101","10101110","10101111",
"10110000","10110001","10110010","10110011",
"10110100","10110101","10110110","10110111",
"10111000","10111001","10111010","10111011",
"10111100","10111101","10111110","10111111",
"11000000","11000001","11000010","11000011",
"11000100","11000101","11000110","11000111",
"11001000","11001001","11001010","11001011",
"11001100","11001101","11001110","11001111",
"11010000","11010001","11010010","11010011",
"11010100","11010101","11010110","11010111",
"11011000","11011001","11011010","11011011",
"11011100","11011101","11011110","11011111",
"11100000","11100001","11100010","11100011",
"11100100","11100101","11100110","11100111",
"11101000","11101001","11101010","11101011",
"11101100","11101101","11101110","11101111",
"11110000","11110001","11110010","11110011",
"11110100","11110101","11110110","11110111",
"11111000","11111001","11111010","11111011",
"11111100","11111101","11111110","11111111",
};
//Binary Lookup Table

int main()
{
HANDLE hFile;
HANDLE hFileMapping;
BYTE * lpFileBase;
//Necessary handles for loading and maping a file into memory

char filename[100];

cin >> filename;
//get name of file to disassemble.

hFile = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
//We want to open an exe file

if (hFile == INVALID_HANDLE_VALUE)
{   
cout << "Couldn't open file." << endl;
return 0;
}
//We also want to make sure nothing went wrong

hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hFileMapping == 0)
{   
CloseHandle(hFile);
cout << "Couldn't open file mapping." << endl;
return 0;
}
//Not quite sure what this step is for, but it seems to be necessary.

lpFileBase = (BYTE *)MapViewOfFile(hFileMapping, FILE_MAP_READ, 0, 0, 0);
if (lpFileBase == 0)
{
CloseHandle(hFileMapping);
CloseHandle(hFile);
cout << "Couldn't map view of file." << endl;
return 0;
}
//Find the beginning of the file in memory

PIMAGE_DOS_HEADER DOSHeader = (PIMAGE_DOS_HEADER)(lpFileBase);
PIMAGE_NT_HEADERS NTHeaders = (PIMAGE_NT_HEADERS)(lpFileBase + DOSHeader->e_lfanew);
PIMAGE_SYMBOL SymbolTable = (PIMAGE_SYMBOL)NTHeaders->FileHeader.PointerToSymbolTable;
PIMAGE_SECTION_HEADER SectionTable = (PIMAGE_SECTION_HEADER)(reinterpret_cast<BYTE *>(NTHeaders) + sizeof(IMAGE_NT_HEADERS));
//Load portions of the file into their respective Win32 API Data structures

for (int i = 0; i < NTHeaders->FileHeader.NumberOfSections; i++)
{
try
{
BYTE * c = reinterpret_cast<BYTE *>(SectionTable[i].Name);

ofstream fout;
//Create file stream

char * outname = strcat(filename, ".dmp");
fout.open(outname, ios_base::out);
//Open file

fout << c << endl;
//Write name of section to file

if (c[1] == 't' && c[2] == 'e' && c[3] == 'x' && c[4] == 't')
{
BYTE * c2 = reinterpret_cast<BYTE *>(lpFileBase + SectionTable[i].PointerToRawData);

for (DWORD j = 0; j < SectionTable[i].SizeOfRawData; j++)
{
fout << lookUpTable[c2[j]];
if (isprint(c2[j]))
{
fout << '\t' << (char)(c2[j]);
}
fout << endl;
//Itterate through with lookup table and write string representation of binary value to file
}
}
fout.close();
//Close file stream
}
catch (...)
{
cout << strerror(GetLastError()) << endl;
}
}
//Itterate through the Section headers in the exe file and extract data from them

cout << "Dump Created Successfully" << endl;

UnmapViewOfFile(lpFileBase);
CloseHandle(hFileMapping);
CloseHandle(hFile);
//Release resources used by process

return 0;
}

OceanJeff32

Oh yeah??? Right before Halloween? How goes the disassembler?

Just curious,

jeff c
:U
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

MusicalMike

Now that I got this out of the way, its going great.

Randall Hyde

Quote from: MusicalMike on October 22, 2005, 10:15:46 PM
I am currently writing a dissassembler in C++. I am at one particular point, and I am just going to admit now I am stumped on how to proceed. I would use the MSDN Board, but Microsoft is notorious for having an extreme dislike for disassemblers. The code I have writen is as follows.


Thanks in advance.

BTW, have you looked at the disassembler engine source code that ships with OllyDbg?
Cheers,
Randy Hyde