comment ~
This is an MD5 hasher for files just to show how you can avoid repeated calls to other
routines.
This doesn't use MD5Init/MD5Update or MD5Final..just the MD5Transform routine, which
makes it significantly faster than some other md5 checksum utilities.
There may be errors in the code, but i've not found any so far.let me know.
i'm mainly posting this, because i'd like to know of more efficient way to
prepare a data block before processing with MD5Transform.
any suggestions/comments are welcome.
in particular, if you can think of more efficient way to calculate
bits, append 64-bit length, i'd be interested to know.
actually, there is no calculation of 64-bit length yet, just 32-bit.
also, would it be better to read a file in blocks, rather than in one attempt?
because it has problems with large files of say 80 Megs, because of GlobalAlloc
I know there has been talk of MD5 being "broken", but this is mainly just an excercise for me.
~
.686
.mmx
.model flat,stdcall
include <windows.inc>
include <kernel32.inc>
include <msvcrt.inc>
include <macros.asm>
MDFile PROTO :DWORD
MD5Transform PROTO :DWORD, :DWORD, :DWORD
main PROTO :DWORD,:DWORD
.data
bWildCard DWORD ?
pEnv DWORD ? ; environment variables
pArgv DWORD ? ; arguement vectors
nArgc DWORD ? ; arguement count
stinfo STARTUPINFO <?>
.code
start:
mov bWildCard, TRUE ; incase we want to process more than 1 file
invoke crt___getmainargs,addr nArgc,addr pArgv,addr pEnv,bWildCard,addr stinfo
invoke main,nArgc,pArgv
exit
main PROC argc:DWORD, argv:DWORD
mov esi, [argv]
mov edi, [argc]
.WHILE (edi > 1)
dec edi
invoke MDFile,[esi + 4 * edi]
.ENDW
ret
main ENDP
MDFile PROC USES EAX EBX ECX EDX EDI ESI file_name:DWORD
LOCAL md5_context[4] :DWORD
LOCAL num_of_blocks :DWORD
LOCAL input_data :DWORD
LOCAL file_handle :DWORD
LOCAL file_size :DWORD
LOCAL bytes_read :DWORD
invoke CreateFile,[file_name],GENERIC_READ,FILE_SHARE_READ,NULL,
OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL
mov [file_handle], eax
.IF( [file_handle] != INVALID_HANDLE_VALUE )
invoke GetFileSize,[file_handle],NULL
mov [file_size], eax
.IF( [file_size] != NULL )
lea ecx, [eax + 128]
invoke GlobalAlloc,GMEM_ZEROINIT,ecx
mov [input_data], eax
.IF ( [input_data] != NULL )
; reading in entire file not good idea?
invoke ReadFile,[file_handle],[input_data],
[file_size],addr bytes_read,NULL
mov eax, [bytes_read] ; get number of bytes read into memory
lea edx, [eax * 8] ; calculate number of bits. (only 32-bit)
mov ebx, eax ; save size in ebx
mov ecx, eax ; save size in ecx
shr eax, 6 ; get number of 64 byte blocks
mov [num_of_blocks], eax
mov edi, [input_data]
add edi, ecx ; get end of data
mov byte ptr [edi], 80h ; append end bit
and ebx, 63 ; get remainder (if any)
sub edi, ebx ; edi holds offset of last block
.IF (ebx >= 56) ; can we append bits here?
add edi, ebx ; no..
mov eax, 64
sub eax, ebx
add edi, eax
inc dword ptr [num_of_blocks]
.ENDIF
mov [edi + 14*4], edx ; store bits
lea esi, [md5_context] ; initialize md5 context
mov dword ptr[esi + (00*04)], 067452301h
mov dword ptr[esi + (01*04)], 0efcdab89h
mov dword ptr[esi + (02*04)], 098badcfeh
mov dword ptr[esi + (03*04)], 010325476h
invoke MD5Transform,addr md5_context, [input_data], [num_of_blocks]
lodsd
bswap eax
xchg eax, edx
lodsd
bswap eax
xchg eax, ebx
lodsd
bswap eax
xchg eax, ecx
lodsd
bswap eax
xchg eax, edx
invoke crt_printf,chr$(10,'MD5 ("%s") = %08X%08X%08X%08X',10),
[file_name],eax,ebx,ecx,edx
invoke GlobalFree,[input_data]
.ELSE
invoke crt_printf,chr$(10,'Error: -> GlobalAlloc()',10)
.ENDIF
.ELSE
invoke crt_printf,chr$(10,'Error: -> GetFileSize() for %s',10),[file_name]
.ENDIF
invoke CloseHandle,[file_handle]
.ELSE
invoke crt_printf,chr$(10,'Error: -> CreateFile()',10)
.ENDIF
ret
MDFile ENDP
FF MACRO dwA, dwB, dwC, dwD, dwX, rS, dwT
mov edi, dwC
xor edi, dwD
and edi, dwB
xor edi, dwD
lea dwA, [dwA + dwT + edi]
add dwA, [dwX]
rol dwA, rS
add dwA, dwB
ENDM
GG MACRO dwA, dwB, dwC, dwD, dwX, rS, dwT
mov edi, dwC
xor edi, dwB
and edi, dwD
xor edi, dwC
lea dwA, [dwA + dwT + edi]
add dwA, [dwX]
rol dwA, rS
add dwA, dwB
ENDM
HH MACRO dwA, dwB, dwC, dwD, dwX, rS, dwT
mov edi, dwC
xor edi, dwD
xor edi, dwB
lea dwA, [dwA + dwT + edi]
add dwA, [dwX]
rol dwA, rS
add dwA, dwB
ENDM
II MACRO dwA, dwB, dwC, dwD, dwX, rS, dwT
mov edi, -1
xor edi, dwD
or edi, dwB
xor edi, dwC
lea dwA, [dwA + dwT + edi]
add dwA, [dwX]
rol dwA, rS
add dwA, dwB
ENDM
MD5Transform PROC USES ESI EDI EAX EBX ECX EDX state:DWORD, block:DWORD, num_blocks:DWORD
LOCAL loop_counter :DWORD
mov esi, [block]
mov edi, [state]
mov eax, [num_blocks]
mov [loop_counter], eax
transform_block:
mov eax, dword ptr[edi + (00*04)]
mov ebx, dword ptr[edi + (01*04)]
mov ecx, dword ptr[edi + (02*04)]
mov edx, dword ptr[edi + (03*04)]
;==================================================
FF eax, ebx, ecx, edx, esi+(00*4), 07, 0d76aa478h
FF edx, eax, ebx, ecx, esi+(01*4), 12, 0e8c7b756h
FF ecx, edx, eax, ebx, esi+(02*4), 17, 0242070dbh
FF ebx, ecx, edx, eax, esi+(03*4), 22, 0c1bdceeeh
FF eax, ebx, ecx, edx, esi+(04*4), 07, 0f57c0fafh
FF edx, eax, ebx, ecx, esi+(05*4), 12, 04787c62ah
FF ecx, edx, eax, ebx, esi+(06*4), 17, 0a8304613h
FF ebx, ecx, edx, eax, esi+(07*4), 22, 0fd469501h
FF eax, ebx, ecx, edx, esi+(08*4), 07, 0698098d8h
FF edx, eax, ebx, ecx, esi+(09*4), 12, 08b44f7afh
FF ecx, edx, eax, ebx, esi+(10*4), 17, 0ffff5bb1h
FF ebx, ecx, edx, eax, esi+(11*4), 22, 0895cd7beh
FF eax, ebx, ecx, edx, esi+(12*4), 07, 06b901122h
FF edx, eax, ebx, ecx, esi+(13*4), 12, 0fd987193h
FF ecx, edx, eax, ebx, esi+(14*4), 17, 0a679438eh
FF ebx, ecx, edx, eax, esi+(15*4), 22, 049b40821h
;==================================================
GG eax, ebx, ecx, edx, esi+(01*4), 05, 0f61e2562h
GG edx, eax, ebx, ecx, esi+(06*4), 09, 0c040b340h
GG ecx, edx, eax, ebx, esi+(11*4), 14, 0265e5a51h
GG ebx, ecx, edx, eax, esi+(00*4), 20, 0e9b6c7aah
GG eax, ebx, ecx, edx, esi+(05*4), 05, 0d62f105dh
GG edx, eax, ebx, ecx, esi+(10*4), 09, 002441453h
GG ecx, edx, eax, ebx, esi+(15*4), 14, 0d8a1e681h
GG ebx, ecx, edx, eax, esi+(04*4), 20, 0e7d3fbc8h
GG eax, ebx, ecx, edx, esi+(09*4), 05, 021e1cde6h
GG edx, eax, ebx, ecx, esi+(14*4), 09, 0c33707d6h
GG ecx, edx, eax, ebx, esi+(03*4), 14, 0f4d50d87h
GG ebx, ecx, edx, eax, esi+(08*4), 20, 0455a14edh
GG eax, ebx, ecx, edx, esi+(13*4), 05, 0a9e3e905h
GG edx, eax, ebx, ecx, esi+(02*4), 09, 0fcefa3f8h
GG ecx, edx, eax, ebx, esi+(07*4), 14, 0676f02d9h
GG ebx, ecx, edx, eax, esi+(12*4), 20, 08d2a4c8ah
;==================================================
HH eax, ebx, ecx, edx, esi+(05*4), 04, 0fffa3942h
HH edx, eax, ebx, ecx, esi+(08*4), 11, 08771f681h
HH ecx, edx, eax, ebx, esi+(11*4), 16, 06d9d6122h
HH ebx, ecx, edx, eax, esi+(14*4), 23, 0fde5380ch
HH eax, ebx, ecx, edx, esi+(01*4), 04, 0a4beea44h
HH edx, eax, ebx, ecx, esi+(04*4), 11, 04bdecfa9h
HH ecx, edx, eax, ebx, esi+(07*4), 16, 0f6bb4b60h
HH ebx, ecx, edx, eax, esi+(10*4), 23, 0bebfbc70h
HH eax, ebx, ecx, edx, esi+(13*4), 04, 0289b7ec6h
HH edx, eax, ebx, ecx, esi+(00*4), 11, 0eaa127fah
HH ecx, edx, eax, ebx, esi+(03*4), 16, 0d4ef3085h
HH ebx, ecx, edx, eax, esi+(06*4), 23, 004881d05h
HH eax, ebx, ecx, edx, esi+(09*4), 04, 0d9d4d039h
HH edx, eax, ebx, ecx, esi+(12*4), 11, 0e6db99e5h
HH ecx, edx, eax, ebx, esi+(15*4), 16, 01fa27cf8h
HH ebx, ecx, edx, eax, esi+(02*4), 23, 0c4ac5665h
;==================================================
II eax, ebx, ecx, edx, esi+(00*4), 06, 0f4292244h
II edx, eax, ebx, ecx, esi+(07*4), 10, 0432aff97h
II ecx, edx, eax, ebx, esi+(14*4), 15, 0ab9423a7h
II ebx, ecx, edx, eax, esi+(05*4), 21, 0fc93a039h
II eax, ebx, ecx, edx, esi+(12*4), 06, 0655b59c3h
II edx, eax, ebx, ecx, esi+(03*4), 10, 08f0ccc92h
II ecx, edx, eax, ebx, esi+(10*4), 15, 0ffeff47dh
II ebx, ecx, edx, eax, esi+(01*4), 21, 085845dd1h
II eax, ebx, ecx, edx, esi+(08*4), 06, 06fa87e4fh
II edx, eax, ebx, ecx, esi+(15*4), 10, 0fe2ce6e0h
II ecx, edx, eax, ebx, esi+(06*4), 15, 0a3014314h
II ebx, ecx, edx, eax, esi+(13*4), 21, 04e0811a1h
II eax, ebx, ecx, edx, esi+(04*4), 06, 0f7537e82h
II edx, eax, ebx, ecx, esi+(11*4), 10, 0bd3af235h
II ecx, edx, eax, ebx, esi+(02*4), 15, 02ad7d2bbh
II ebx, ecx, edx, eax, esi+(09*4), 21, 0eb86d391h
;==================================================
mov edi, [state]
add [edi + (00*04)], eax
add [edi + (01*04)], ebx
add [edi + (02*04)], ecx
add [edi + (03*04)], edx
dec dword ptr [loop_counter]
lea esi, [esi + 64]
jns transform_block
ret
MD5Transform ENDP
END start
Hi Kernel,
I had to add the following lines to your program posted above before it would assemble and load for me. The first line replaces the 'include <macros.asm>' that you had in the original program. I'm using sp2a and I haven't made any modifications to the masm32 data. When I saw that I figured that maybe you had copied macros.asm into your /masm32/include directory. I'm glad I took the time to add the few lines. It works like a champ!
include \masm32\macros\macros.asm
includelib kernel32.lib
includelib msvcrt.lib
Having done that, I tested it against md5sums and it was, indeed, much faster. The file I tested was about 5 MB and it returned in a heartbeat whereas MD5SUMS took a second or two. Edit: I just tried to reproduce this result and MD5SUMS was also quite fast. I'm not sure why it was slower the first time I tried it. Anyway your program produced the correct result very quickly.
It would probably be better to set your program up to do asyncronous I/O using two or more buffers if you really want to gain performance on very large files. You could set up a read thread to keep filling the buffers and have your main thread do the hash as each buffer was presented.
I have some extremely large files that I could test it with later if you make allowances for large files. I generate them with my Fibonacci program and normally use MD5SUMS to verify that the output is identical to that of control runs. The largest one I've generated is about 4 GBytes and it takes MD5SUMS a while, maybe 5-10 minutes, to walk thru it and present its result.
Thanks for posting the code, I'd always been curious about how the MD5 hash worked but I had never really pursued the answer. Seeing your program helps me understand a little more about it. Looks like cryto stuff!
The routines here are not very usable since they need the whole file loaded in memory before it can be hashed. The whole point of md5_init/md5_update/md5_finish is so you can start the process with md5_init, feed more and more bytes by calling md5_update numerous times, and then finalize the process with md5_finish. That kind of flexibility allows more complex situations like using asynchronous I/O.
Phil, the reason md5sum was faster the second time was because the file contents were cached in memory by NT's cache manager. If you want true benchmarks, you should modify md5sum to open the file with FILE_FLAG_NO_BUFFERING attribute (however, that might also break md5sum if it doesn't read at sector size boundaries).
yes, i'm sorry there was no reply on this - actually meant to delete this post because the code was badly written.
How hard would it be to modify this code to hash a string instead of a file?
I use iblis's MD5 library for hashing a password string - for example like phpbb (v2) uses
http://www.asmcommunity.net/board/index.php?topic=14399.msg159208#msg159208
include md5.inc
includelib md5.lib
.data
phpmd5ctext MD5CTXT <>
phpmd5chash MD5HASH <>
.code
phpbb_MD5Hash PROC pszString:dword, pHash:dword
;uses ebx
invoke lstrlen,[pszString]
mov ebx,eax
invoke MD5_Startup
invoke MD5_Init, offset phpmd5ctext
invoke MD5_Read, offset phpmd5ctext, [pszString], ebx
invoke MD5_Digest, offset phpmd5ctext, offset phpmd5chash
invoke MD52StringA, offset phpmd5chash, [pHash], 1
ret
phpbb_MD5Hash ENDP
pszString is your password string and pHash is the hashed output of the password string. Copy md5.inc to \masm32\include and md5.lib to \masm32\lib (or wherever you install your masm32). Or you can adapt Kernel_Gaddafi's code to process a string of data as passed to a proc, instead of a file. Hope that helps
Thanks for the link. This is exactly what I was looking for! :U
I just implemented this using C++ and it seems to be extremely fast.
Out of interest, would it be even faster if used within an assembly program or would the speed be the same?
Quote from: lonewolff
Out of interest, would it be even faster if used within an assembly program or would the speed be the same?
That would depend. If you are simply calling these functions, the INVOKE is basically a call with a stack frame, and will be very similar/identical in compiled C. If you process a lot of strings, or manipulating then a prior to performing the MD5, then assembly would help. If in doubt time it. You could also use inline assembly in critical areas, or use it to access the cycle counter RDTSC.
Changing this routine to hash a string instead of a file is trivial, are you using this for a brute forcer? (is that the reason for a need of speed?)
HR,
Ghandi
Even with typical applications of hashing, signing, or fingerprinting, one typically feeds a large amount of data through MD5, or SHA, or for that manner normal RC4/DES/AES applications.
As such, any improvement in efficiency, even small ones, compound quite significantly.
Quote from: Ghandi on June 05, 2010, 02:09:00 PM
Changing this routine to hash a string instead of a file is trivial, are you using this for a brute forcer? (is that the reason for a need of speed?)
Yes, I have been interested in this lately (since I found out how easy it is to crack WEP).
This is all for learning purposes, though. It makes me a better Network Systems Administrator if I know how how easy (or hard) it is to compromise different security systems. I can then offer better advice to clients. Longer passwords WPA2 over WEP etc. :)
On my dual Xeon server I can brute force a 6 character MD5 password in about 27 minutes (with Cain).
I was hoping to multi-thread my own app to try and bring it down to five minutes using all eight cores.
If I can pull this off, I might look into Cuda so then I can use all processing cores on a crossfire system to scale the app to run on 800 or so cores.
Surely if you know you can comprimise the system in 27 minutes on a dual core xeon that is good enough to know for Network Systems Administration? Why would you need to know exactly how to crack it any faster....
Just tell your clients not to use WEP ::)
Oex, See my previous post...
Let me ask you this. Why do we program in assembly, for instance. 99% of us will never program something of commercial quality in assembly (or even c++ in that matter).
Answer this and then you have the answer to your own quetsion.
Ah yes I have reread your post but can tell you, just dont recommend WEP.... The bit that caught my attention was "It makes me a better Network Systems Administrator if I know how how easy (or hard) it is to compromise different security systems"
You are supposing in your question that you will get told the fastest method and that you will implement it properly on multi-core or even on cuda and that that is indeed the fastest way of cracking it....
You are not supposing that the user is the weak link, other implementations and hardware will likely dwarf yours or any recommendations on this forum and any advice you give as a Network Systems Administrator will stick, probably for many many years....
A quick search on the web will give you results achieved by educational and commercial bodies which will at least give a better indication than running substandard algorithms