News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Fast GetProcAddress

Started by Petroizki, March 22, 2005, 01:33:31 PM

Previous topic - Next topic

Petroizki

Since the last thread disappeared somehow, i'll repost the code.
I have made some optimizations, and next i will be making another function to get multiple addresses in one call.

Note that neither hModule or lpProcName cannot be NULL. You also must use '.686' processor directive.

The function uses my function macros to free up ebp, and to use locals via esp. It also moves the registers into stack instead of pushing, seems a lot faster on my Athlon. I will eventually make a "final" version without my macros for default MASM users, but i don't currently wan't to maintain two versions.

func GetProcOffset, stdcall uses ebp ebx edi esi, MOVE, hModule:DWORD, lpProcName:DWORD
PLOCAL dwAddrExportAddresses:DWORD
PLOCAL dwAddrNameAddresses:DWORD
PLOCAL dwAddrNameOrdinals:DWORD
PLOCAL dwAddrExportTable:DWORD
PLOCAL dwDataSize:DWORD

mov ebp, dword ptr [hModule]        ; get image offset
mov esi, dword ptr [lpProcName]     ; get proc name or ordinal value
@get_proc:
mov ebx, dword ptr [ebp + 3Ch]      ; get PE header offset (relative)

; get the export table size
mov ecx, dword ptr [ebp + ebx.IMAGE_NT_HEADERS.OptionalHeader.DataDirectory.isize]

; get export table offset (relative)
mov edx, dword ptr [ebp + ebx.IMAGE_NT_HEADERS.OptionalHeader.DataDirectory.VirtualAddress]

mov dword ptr [dwDataSize], ecx

test edx, edx                       ; check if there is export table
jz @not_found                       ; jump if no address of export table

; save the export table address
mov dword ptr [dwAddrExportTable], edx
add edx, ebp

test esi, 0FFFF0000h                ; check if this is function's ordinal value, and not a name
jz @ordinal                         ; jump if ordinal value

mov ecx, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.AddressOfNameOrdinals]
mov eax, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.AddressOfFunctions]
; get offset of pointer to export names
mov ebx, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.AddressOfNames]

mov dword ptr [dwAddrNameOrdinals], ecx

add eax, ebp                        ; normalize offset
add ebx, ebp                        ; normalize offset

; get number of export
mov ecx, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.NumberOfNames]
stc

; save address of export addresses
mov dword ptr [dwAddrNameAddresses], eax
; save offset of export addresses
mov dword ptr [dwAddrExportAddresses], ebx

@find_name:
cmova ebx, edx

test ecx, ecx
jz @not_found                       ; jump if no more export names left

shr ecx, 1                          ; divide by 2 (binary search)
mov esi, dword ptr [lpProcName]     ; get the function name
lea edx, [ebx + ecx*4 + 4]          ; calculate offset
mov edi, dword ptr [ebx + ecx*4]    ; get start address of export function name

@@: mov al, byte ptr [esi]              ; get first char
inc esi
cmp al, byte ptr [ebp + edi]
jnz @find_name
inc edi

test al, al
jnz @B
@name_found:
; sub offset of export addresses to get index
sub edx, dword ptr [dwAddrExportAddresses]
mov eax, dword ptr [dwAddrNameOrdinals]

sub edx, 4
add eax, ebp

shr edx, 1
mov edi, dword ptr [dwAddrNameAddresses]
movzx ecx, word ptr [edx + eax]     ; get ordinal value

; get proc address (relative)
mov ecx, [edi + ecx*4]
@check_forwarder:
; Checks for forwarder
; ex: NTDLL.RtlAllocateHeap
lea eax, [ebp + ecx]                ; normalize proc address
; subtract export table address from proc address
sub ecx, dword ptr [dwAddrExportTable]

cmp ecx, dword ptr [dwDataSize]     ; check if proc address is inside export table
ja @got_address                     ; jump if not

xor ecx, ecx
@@: movzx edi, byte ptr [eax + ecx]
inc ecx
cmp edi, "."                        ; see if this was the period
jne @B

lea esi, [eax + ecx]                ; get function name

dec ecx
mov edx, ecx                        ; is the same as length of library name
and ecx, 0FFFFFFFCh                 ; ignore partial DWORD's
lea ebx, dword ptr [ecx + 4]

@@: push [eax + ecx]                    ; push part of text string to stack
sub ecx, 4                          ; decrease counter
jnc @B                              ; jump if there are more dwords to go

mov byte ptr [esp + edx], 0         ; set null byte to end of temp text string

fncall GetModuleHandleA, esp        ; get starting offset of the redirected library
test eax, eax
jz @load_library

add esp, ebx                        ; clear temp string
@find_new:
mov ebp, eax                        ; get HMODULE of the new DLL

cmp byte ptr [esi], '#'
mov [lpProcName], esi               ; set the forwarded proc name
jne @get_proc

; resolves forwarder's ordinal number
; ex: NTDLL.#15
xor eax, eax
@@: inc esi
lea eax, [eax + eax*4]
movzx ecx, byte ptr [esi]
cmp byte ptr [esi + 1], 30h
lea eax, [ecx + eax*2 - 30h]
jns @B

mov esi, eax
jmp @get_proc

@ordinal:
; check if function ordinal value is within bounds
cmp esi, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.NumberOfFunctions]
ja @not_found

; get function offset table (relative)
mov eax, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.AddressOfFunctions]

; ordinal base must be subtracted from given ordinals
; before indexing the export address table, but not
; from values in the ordinal table, which runs parallel
; to the name rva table

; subtract ordinal base
sub esi, dword ptr [edx.IMAGE_EXPORT_DIRECTORY.nBase]
add eax, ebp                        ; normalize offset

mov ecx, dword ptr [eax + esi*4]    ; get the address of proc (relative)
jmp @check_forwarder

@load_library:
fncall LoadLibraryA, esp
add esp, ebx
test eax, eax
jnz @find_new
@not_found:
xor eax, eax
@got_address:
ret
endf GetProcOffset


Here's the program to benchmark it against the GetProcAddress: http://personal.inet.fi/atk/partsu/GetProcSpeed.zip
I have also included the old version, just to see if my optimizations were worth it.  :P
You can either specify the function name, or the ordinal number with a leading '#' (#15).


EDIT: Added forwarded ordinal support.

hutch--

Thanks for reposting this thread, it is an interesting one and I don't know what happened last time.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

Petroizki

All right, here is one way of getting all the proc addresses of one module at a time. The function is called GetProcArray, and it's inner loop is actually the same as in the code posted above, but it should be handy to get the addresses without multiple calls. :wink

The first parameter is the handle to the module, just like in the default GetProcAddress.
The second parameter is a bit more complicated, it is the array which will receive all the addresses, but it's also the initialization array.

Before the call, initialization/result array must contain either addresses of proc names or ordinal values. After the call to the GetProcArray, the initialization values are replaced by the addresses of the specified procs.

This is what the initialization looks like, there is 5 procedures which address we want to resolve:
.data
; these are the strings addresses by the initialization
STRING_PROC0 db "IcmpCreateFile", 0
STRING_PROC1 db "IcmpSendEcho", 0
STRING_PROC2 db "IcmpCloseHandle", 0
STRING_PROC3 db "IcmpParseReplies", 0

ALIGN 4                           ; align the array
; THIS IS THE ARRAY THAT WILL RECEIVE THE ADDRESSES!
;[
g_pfnIcmpCreateFile dd (OFFSET STRING_PROC0)  ; this will receive IcmpCreateFile address
g_pfnIcmpSendEcho dd (OFFSET STRING_PROC1)    ; this will receive IcmpSendEcho address
g_pfnIcmpCloseHandle dd (OFFSET STRING_PROC2) ; etc..
g_pfnIcmpParseReplies dd (OFFSET STRING_PROC3); etc..
g_pfnSomeOrdinal dd 0008h ; this will receive ordinal proc 8h address (upper WORD must be ZERO!)
dd 0                      ; this must be here, to end the search
;]

So.. At the time of initialization, the 'pfnIcmpCreateFile' is actually the address of null-terminated string "IcmpCreateFile", but after the call to GetProcArray, it will contain the address of that proc. You will need to pass the starting point of the array to GetProcArray, and it will then fill the array with function addresses. Remember that the last DWORD in the array must be 0, so the proc search is end.

.code
invoke GetProcArray, Module, ADDR g_pfnIcmpCreateFile ; get addresses
test eax, eax
jz failed

; initialization array has been replaced with addresses

call dword ptr [g_pfnIcmpCreateFile] ; will call the IcmpCreateFile


If the function fails to find any proc it will stop the search and return 0, otherwise it returns non-zero.

EDIT: I made a library file, in case anyone want's to try these two functions without my macros: http://personal.inet.fi/atk/partsu/GetProcLib.zip

Function prototypes are simple, the GetProcOffset is just like GetProcAddress API.
GetProcArray takes the handle parameter, and the offset of the initialization/result array.
   GetProcOffset proto :DWORD, :DWORD
   GetProcArray proto :DWORD, :DWORD

James Ladd

Petroizki ,
I think this is great.
However, would it be possible to do a GetProcArray where I dont have to specify ordinals or names?
Maybe the caller doesnt know what ordinals or names there are.
rgs, striker

Petroizki

striker,

I don't quite understand, without ordinals or names it's impossible to find any proc addresses. Or do you just want a function that would retrieve information about the library exports (amount, address to name table)?

liquidsilver

I'm glad that this is back, I wondered where the first one disappeared to.

Nice work, I use this function in all my programs that need it. :clap:

I found that it ran +-90% faster than the GetProcAddress on the first run and then the GetProcAddress sped up after each call. Eventually it was only slightly slower (+-5%) This make me think M$ has some trickery going on there :naughty:, but then again, nothing by M$ suprises me anymore :bg

James Ladd

Petroizki ,
What I am suggesting is that if you have a name table, then just return the corresponding addresses in the order they appear.
If the def file had:

EXPORTS
    myapi1 @ 0
    myOtherApi @ 1
    myAdditionalApi @ 2


and I called your new API without any arguments other than the module handle (hinst) then
i would like to get back an array of 3 DWORD entries containing the addresses of each of the
functions that were exported.

If this required that all exports had ordinals and they were numbered from 0 through 'n' then
that would be fine.

I do hope this is clearer


x86asm

HEre is the results from the PC @ my Uni:

"Intel (R) Pentium (R) Processor"
GetProcAddress: 1420
GetProcOffset: 376
GetPRocOffset OLD: 496


When I get home today I will test on my Athlon XP 1900+.

Its a 2.53Ghz P4 CPU.

db90h

#8
Hi guys,

I authored a similar custom GetProcAddress as a plug-in for PECompact2 so that I could get around win9x's disallowing of ordinal imports of kernel32 through kernel32.GetProcAddress. For those who don't know, M$ decided it would not allow anyone to import by ordinal from kernel32 via GetProcAddress in win9x. I wrote my algorithm to use a binary search, with forwarders supports, and even use hints if they are available in the import descriptors. My results were similar to those that you guys have discovered -- I could not substantially outperform kernel32.GetProcAddress when benchmarked with several hundred imports at a time. Apparently M$'s GetProcAddress speeds up after the first run, causing me to believe that it is likely memory access time that is the bottleneck after the code is fully in the cpu cache. This means that perhaps there is no code that can speed things up substantially beyond what has already been done -- only less memory accesses, perhaps.. but I don't know how that could be accomplished.

I never got around to doing much optimization, so my version is likely bigger and less optimal than the one already posted. In other words, my code sucks, so there's no need to tell me that, I already know<g>. But here it is anyway<g>.. I think this thread, if anything, discourages me from further optimizations ;). However, I do suggest the author allow for hints to be used, since they often can take the algorithm directly to the correct member of the export names table.

Here is a summation of the code capabilities:

+ binary seach
+ hints used (initial mid)
+ forwarded names and ordinals supported.

Latez ;)
db90h


;
; --- lame strcmp macro.. could be sped up alot

MYSTRCMP MACRO
LOCAL endme
LOCAL loopme
LOCAL endgood
LOCAL endgreat
LOCAL endless

  loopme:
mov al,byte ptr [esi]
mov ah,byte ptr [edi]
inc esi
inc edi
test ax,ax
jz endgood
cmp al,ah
ja endgreat
jb endless
jmp loopme
  endgood:
xor eax,eax
jmp endme
endgreat:
mov eax,1
jmp endme
endless:
mov eax,-1
endme:
ENDM

;-------------------------------------------
;
; GetProcAdressWithHint
;
;  uses hints
;  binary search
;  handles forwarded exports (NTDLL.Name, NTDLL.#123)

;
GetProcAddressWithHintHook proc uses ebx edi esi hModule:DWORD, pszApi:DWORD, nHint:DWORD
LOCAL nStartIndex:DWORD
LOCAL nExportSize:DWORD
LOCAL szTemp1[256]:BYTE

GET_DELTA ebx

        ; get pointer to PE header
mov eax,hModule
add eax,([eax]+IMAGE_DOS_HEADER.e_lfanew)

; exports is first data directory
lea edi,([eax]+IMAGE_NT_HEADER.DataDirectories)
mov esi,[edi]
mov eax,[edi+4]
mov nExportSize,eax
add esi,hModule
; esi-> export directory

mov ecx,pszApi
cmp ecx,0ffffh
ja ImportByName

  ;----------------------------------------------
  ;
  ; ImportByOrdinal
  ;  ordinal in ecx
  ;
  ImportByOrdinal:
sub ecx,([esi]+ExportDirectory.Base)
mov edi,([esi]+ExportDirectory.AddressOfFunctions)
add edi,hModule
shl ecx,2   ; *4
mov eax,[edi+ecx]
add eax,hModule

;
; see if this is a forwarded export
; check if in export section (COFF spec)
;
cmp eax,esi
jb NotForwarder
add esi,nExportSize
cmp eax,esi
ja NotForwarder

;
; format:
;   NTDLL.FuncName
;   NTDLL.#1
;
; find '.'

lea edi,szTemp1
; edi->DLL name

FindDotLoop:
  mov cl,byte ptr [eax]  
  mov byte ptr [edi],cl
  inc eax
  cmp cl,'.'
  jz FoundDot      
  inc edi  
  jmp FindDotLoop
 
FoundDot:

  mov byte ptr [edi],0  
  lea edi,szTemp1
 
  ; edi->DLL name
  ; eax->API or ord#
  mov cl,byte ptr [eax]
  cmp cl,'#'
  jnz IsNamedForwarder    
  inc eax
  push eax
  call AsciiDecimalToDword   ; todo: make macro, not function.. lewser ;p
    IsNamedForwarder:
  mov esi,eax  
ImportIt:
  push edi
  call g_pGetModuleHandleA[ebx]
  test eax,eax
  jz GotMod
  push edi
  call LoadLibraryAHook
GotMod:
  ; eax=module
  test eax,eax
  jz NotFound
 
  push 0
  push esi
  push eax
  call GetProcAddressWithHintHook      

NotForwarder:
; eax= api address
ret

;--------------------------------------------------------------
;
; ImportByHint  (index into Name Pointer Table)
;
;  Hint in eax
;
ImportByHint:   
mov edi,([esi]+ExportDirectory.AddressOfNameOrdinals)
add edi,hModule

;shl eax,1
;xor ecx,ecx
;mov cx,[edi+eax]
movzx ecx,word ptr [edi+eax*2] ;; jibz suggestion 8-2-4
mov edx,([esi]+ExportDirectory.Base)
add ecx, edx

jmp ImportByOrdinal

;-------------------------------------------------------------
;
; Begin Import by name
;
  ImportByName:

;
; do binary search using hint as initial mid
;
mov edi,([esi]+ExportDirectory.AddressOfNames)
add edi,hModule

; edi->Names Array
; ecx=current index
;
mov nStartIndex,0
mov edx,([esi]+ExportDirectory.NumberOfNames)
dec edx
; edx holds end index

xor eax,eax
add eax,nHint
jz BinarySearch
cmp edx,eax
jb BinarySearch  ;; hint out of bounds, ignore it

;
; hint is available so use it as initial mid
;
mov ecx,eax
jmp StartBinarySearchWithHint

  BinarySearch:   
  mov ecx,edx
  mov eax,nStartIndex
  ; ecx=last
  ; eax=first 
  cmp eax,ecx
  ja NotFound
  add ecx,eax     
  shr ecx,1  ; /2
  ;ecx=mid ...
StartBinarySearchWithHint: 
 
  mov eax,[edi+(ecx*4)]
add eax,hModule

push ecx
push edx
push edi
push esi

mov edi,pszApi
mov esi,eax
MYSTRCMP

pop esi
pop edi
pop edx
pop ecx

;
; if eax > 0 .. then current api > sought api
; if eax < 0 ... then current api < sought api
;

cmp eax,0
jz FoundMatch
jg GreaterThan
;
; LessThan
;  - discard lower half
inc ecx
mov nStartIndex,ecx
jmp BinarySearch

GreaterThan:
dec ecx
mov edx,ecx
jmp BinarySearch

FoundMatch:
;
; ecx=index into names/ordinals array
;
mov eax,ecx
jmp ImportByHint

NotFound:
xor eax,eax
ret
GetProcAddressWithHintHook endp


; DWORD AsciiDecimalToDword(char *string);
AsciiDecimalToDword proc uses ebx edi esi src:DWORD
mov esi,src
xor eax,eax
xor ebx,ebx
asz_loop:
mov bl,byte ptr [esi]
inc esi
or bl,bl
jz end_as
and ebx,0fh
imul eax,10
add eax,ebx
next_as_iter:
jmp asz_loop
end_as:
ret
AsciiDecimalToDword endp

x86asm

The program crashes immediately on my home machine, I'm running Windows 2000 SP4.
The instruction at "0x77e189ce" referenced memory at "0x6874756c", the memory could not be "read"
Unhandled exception in speeder.exe (USER32.DLL): 0xC0000005 Access Violation

Offending instruction on my PC is: test        byte ptr [ebx+2Bh],0C0h
at address 77E189CE in USER32.DLL

Petroizki

striker,

So you want _all_ exports from the module in one call?

db90h,

thanks, your code seems interesting, and the hint stuff is something i haven't heard of before.

x86asm,

Thanks for testing it.
Did the program crash immediatily when you executed it? Or did it crash after you pushed the benchmark button (if so, what was the library and proc name you searched?)?
The program is quite small, could you by any change step it trough with debugger? I suspect it crashes on the SetDlgItemTextA after getting the cpu vendor name with cpuid, the name is probably not null terminated..

Petroizki

I'm not totally sure about the hint value handling, what if the hint value is too small, then the proc can be easily missed, should i in such case run the search again without the hint? Also i think i have to check that the hint value is not too large?

I added the forwarded ordinal check, and updated all the code in the first post and in my website. :8)

db90h

Hi,

Hopefully I have understood your query here ...

Using the name hint as the initial Mid in the binary search (without making it actual middle point by adjusting the start or last index) should allow you to then continue with the algorithm just fine. If the sought API name is less than mid, then you can discard all items >=mid, if it is greater than mid you can discard all items <=mid, and of course if it is =Mid then you have found your API. Mid doesn't actually have to be truly in the middle of your data range for the algorithm to work. The only check you should have to do before jumping right into the binary search is to make sure the hint isn't greater than the maximum index in the export names table.

This is what my algorithm does, it simply sets initial mid index (aka first compared index in array) to the hint value and goes on with its business. I believe that is how the hint value is meant to be used.

Have you done some research on what the hints are? Regardless, for others reading who may not know: They are created by the linker at build time and prefix the API names in the import table. They of course represent the location of the API name in the export names table of the DLL on the system the linker is running on. The MS loader uses them to speed up processing of the import table. For more information, read PE docs.

Cool that u added the forwarded import support ;). Your code is very well done.

db90h

db90h

Hi again,

Looking at your code, I think I see why you were worred that a hint value too low could cause you to miss the desired API. It looks like your binary search only goes one direction .. it seem to start at the end of the array and divide by 2 until you hit the API you are looking for. I don't understand how this could work, you would miss many APIs, so I have surely mis-read your code. Still, I see no < or > comparison, so something seems at least quite different from my interpretation of a binary search.

I dunno.. please set me straight on how your binary search functions ;).

db90h

Petroizki

db90h,

Thanks for your help, and i actually decided to check the hint first, and then start the original loop from the beginning if the hint was wrong. I will optimize the function, and then probably publish it as GetProcOffsetEx.. :bg

It does not miss any API's, keep the eye on the 'ebx' register on the binary search loop.  :wink

EDIT: I'm not sure about the correct usage of hint value, but my PE documentation sais this: "A match is attempted first with this value. If it fails, a binary search is performed on the DLL's Export Name Pointer Table". And this is easier for me too, if the hint is wrong, then it's better to start the default loop.