In order for BinSearch to work correctly, u have to use it like this :}
mov found_offset, 0
@@:
...
invoke BinSearch, found_offset, pointer_of_data_to_look_in, data1_len+1, pointer_of_data_to_look_for, data2_len+1
...
add eax, data2_len
mov found_offset, eax
jmp @B
otherwise it will find one-byte-less-pattern & not sure what it finds if there is only one-byte-pattern :D (pseudo-random value my guess)
Thanks. :U
Will have a look when I get time.
Here is the code that illustrates the problem & output (a little sarcastic, couldn't resist :green)
Lets find 'grand', shouldn't be that hard right ?
Data1: grangrandgrannygrant
Data2: grand
Is it grang ?
Is it grann ?
Is it grant ?
Powered by BinSearch woohoo!
Press any key to continue ...
sadly but today I've found another more severe bug in BinSearch & it will cause application to crash :(
1. it only occured using memory mapped files (maybe because of the (low) address that it maps file to)
2. and only when the pattern is not found
2. and only if starting byte of the pattern was 00h
3. and only if the last (sizeof data_to_look_for) bytes of data_to_look_in were 00h
4. it seems the end of search is then undefined :S & causes to go outside the data boundaries & crash
(you may think it's a rare scenario but it happens :S)
I tried to look at BinSearch code & understand how it happened, but man, it's a little cryptic may take a few days :S to grasp
Anyway, below is attached code with reproduced error.
It chokes at 00401203 with ebp=00340FFF, edx=2, at the end of a memory block ranging from 340000 to 341000.
Where is the source, by the way? I can find all kind of variants in \masm32\m32lib except the plain BinSearch...
CPU Disasm
Address Hex dump Command Comments
004011F6 |. /E9 B8000000 jmp 004012B3
004011FB | |90 nop
004011FC |> |8D2C31 lea ebp, [esi+ecx]
004011FF |. |8B5424 24 mov edx, [esp+24]
00401203 |> |0FB65C2A FF /movzx ebx, byte ptr [ebp+edx-1]
00401208 |. |3A5C3A FF |cmp bl, [edi+edx-1]
0040120C |. |0F85 9E000000 |jne 004012B0
it's here \masm32\m32lib\bsearch.asm
OK, found:
Quote align 4
Pre_Match:
lea ebp, [esi+ecx] ; put current scan address in EBP
mov edx, [esp+20+16] ; put pattern length into EDX
Test_Match:
REPEAT 7
movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
cmp bl, [edi+edx-1] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jz Match
ENDM
:bg
This is the first test piece. Searches for a pattern that exactly matches the last 99 bytes of the test file, my version of win32.hlp. Now apart from the smartarse wisecracks it would be a lot more use if I knew what the problem was supposed to be. This finds the right offset from the beginning of the file noting that the file is loaded into legally readable memory.
IF 0 ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
Build this template with "CONSOLE ASSEMBLE AND LINK"
ENDIF ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
include \masm32\include\masm32rt.inc
BinSearchx PROTO :DWORD,:DWORD,:DWORD,:DWORD,:DWORD
.data
pattn \
db 00h,00h,00h,0F0h,0FBh,0FFh,00h,0A4h,0A0h,0A0h,00h,80h,80h,80h,00h,00h
db 00h,0FFh,00h,00h,0FFh,00h,00h,00h,0FFh,0FFh,00h,0FFh,00h,00h,00h,0FFh
db 00h,0FFh,00h,0FFh,0FFh,00h,00h,0FFh,0FFh,0FFh,00h,04h,0FFh,0FFh,01h,00h
db 0FFh,0FFh,3Ah,0FFh,0FCh,0D1h,09h,20h,00h,0FFh,0FCh,08h,20h,65h,07h,00h
db 11h,10h,77h,0Fh,10h,11h,10h,07h,30h,0FCh,07h,60h,17h,0A0h,37h,80h
db 00h,00h,0FFh,0FCh,0FFh,0FFh,0FFh,0FFh,0FFh,00h,00h,0FFh,0FFh,0FFh,0FFh,0FFh
db 0FFh,0FFh,00h
; pattn = 99 bytes
ppatn dd pattn
.code
start:
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
call main
inkey
exit
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
main proc
LOCAL hMem :DWORD
LOCAL flen :DWORD
mov hMem, InputFile("win32.hlp")
mov flen, ecx
invoke BinSearchx,0,hMem,flen,ppatn,99
print str$(eax),13,10
free hMem
ret
main endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
align 16
BinSearchx proc startpos:DWORD,lpSource:DWORD,sLen:DWORD,
lpPattern:DWORD,pLen:DWORD
push ebx
push esi
push edi
push ebp
; ----------------
; setup loop code
; ----------------
mov esi, [esp+8+16] ; lpSource
mov edi, [esp+16+16] ; lpPattern
mov al, [edi] ; get 1st char in pattern
mov ecx, [esp+12+16] ; sLen
sub ecx, [esp+20+16] ; pLen ; sub pattern len to avoid searching past end of src
add ecx, 1
add esi, ecx ; add source length
neg ecx ; invert sign
add ecx, [esp+4+16] ; startpos ; add starting offset
sub DWORD PTR [esp+20+16], 1
jmp Scan_Loop
; ----------------------------------------
align 4
Pre_Match:
lea ebp, [esi+ecx] ; put current scan address in EBP
mov edx, [esp+20+16] ; put pattern length into EDX
Test_Match:
REPEAT 7
movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
cmp bl, [edi+edx-1] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jz Match
ENDM
movzx ebx, BYTE PTR [ebp+edx-1] ; load last byte of pattern length in main string
cmp bl, [edi+edx-1] ; compare it with last byte in pattern
jne Pre_Scan ; exit loop on mismatch
sub edx, 1
jnz Test_Match
jmp Match
align 4
Pre_Scan:
add ecx, 1 ; start on next byte
Scan_Loop:
REPEAT 7
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
add ecx, 1
jns No_Match ; exit on sign inversion
ENDM
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
add ecx, 1
js Scan_Loop ; exit on sign inversion
; ----------------------------------------
No_Match: ; fall through here on no match
mov eax, -1
jmp isOut
Match:
add ecx, [esp+12+16]
sub ecx, [esp+20+16]
mov eax, ecx
isOut:
pop ebp
pop edi
pop esi
pop ebx
ret 20
BinSearchx endp
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
end start
Now I can get this result so far.
With this test data,
text db "Beware of t*e ides of March",0
ptxt dd text
; text = 27 bytes
ptn1 db "March"
ptn2 db "arch"
ptn3 db "rch"
ptn4 db "ch"
ptn5 db "h"
pt1 dd ptn1
pt2 dd ptn2
pt3 dd ptn3
pt4 dd ptn4
pt5 dd ptn5
If I run this code,
invoke BinSearchx,0,ptxt,27,pt1,5
print str$(eax),13,10
invoke BinSearchx,0,ptxt,27,pt2,4
print str$(eax),13,10
invoke BinSearchx,0,ptxt,27,pt3,3
print str$(eax),13,10
invoke BinSearchx,0,ptxt,27,pt4,2
print str$(eax),13,10
invoke BinSearchx,0,ptxt,27,pt5,1
print str$(eax),13,10
I get this result.
22
23
24
25
-1
Press any key to continue ...
It does not match a single character. It was not a major consideration in that pattern matching is not usually done on single characters.
Now if I tweak the algo here,
align 4
Pre_Match:
lea ebp, [esi+ecx] ; put current scan address in EBP
mov edx, [esp+20+16] ; put pattern length into EDX
add edx, 1 ; This has been added !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
I get this result.
22
23
24
25
26
Press any key to continue ...
Quote from: hutch-- on October 21, 2010, 11:38:11 PM
Now apart from the smartarse wisecracks it would be a lot more use if I knew what the problem was supposed to be. This finds the right offset from the beginning of the file noting that the file is loaded into legally readable memory.
Can we conclude from this that the test provided by the smartarse wisecracks does not cause an access violation on your machine?
With a report that turns up 4 years after I wrote it, the smartarse wisecracks don't contribute anything to fixing it. It appears that it was not searching for the last character in the pattern and so far the tweak seems to deliver the correct results. RE: memory range problems, I would imagine that working in legally readable memory would solve that.
OK I'm trying to be as friendly as I can (and not take offense :()
I think I know what causes BinSearch to read past valid address
Let's consider this scenario:
source data (32 bytes):
10 10 10 10 10 10 10 10
10 10 10 10 10 10 10 10
10 10 10 10 10 10 10 10
10 10 10 10 10 10 00 00
pattern data (2 bytes):
00 6A
It compares first 30 bytes of source data against '00' and nothing bad is happening
But things get ugly when it finds first byte match, u see..
Scan_Loop:
REPEAT 7
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
add ecx, 1
jns No_Match ; exit on sign inversion
ENDM
The end of search is determined by signed addition, if ecx turns from FFFFFFFF to 0 and finally to 1 it's over, but the problem is that
add ecx, 1
jns No_Match ; exit on sign inversion
code-block is never accessed, the algo keeps finding first byte match '00'. The case with memory mapped files is worse because mapped files usually are 4096 bytes zero aligned.
So simple solution would be: bring check of not signed addition in front
; version 3
Scan_Loop:
REPEAT 7
add ecx, 1
jns No_Match ; exit on sign inversion
sub ecx, 1
cmp al, [esi+ecx] ; scan for 1st byte of pattern
je Pre_Match ; test if it matches
ENDM
add ecx, 1
cmp al, [esi+ecx] ; scan for 1st byte of pattern
...
can someone post a fully fixed version, I always knew something was off with binsearch when it failed to match patterns, was my fault to assume was an error on my part instead of the alg. I use this to find individual characters and tags like <a> </a> but it failed me too often.
I can duplicate the problem with ramguru's data set and rather than waste the time fixing this version I will do another one when I get a bit more time. I have some of it fixed but finally I wrote the algo over 4 years ago and I don't write that style of algo in the manner any longer.