News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Rewrite of binsearch algo.

Started by hutch--, November 05, 2010, 12:54:16 PM

Previous topic - Next topic

hutch--

This version seems to be test up OK here at the moment. The test piece runs a set of tests that appear to be working. The algo still has a stack frame and has not been unrolled yet but its easier to work on in this form and do the normal optimisations when its tested more fully.


IF 0  ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
                      Build this template with "CONSOLE ASSEMBLE AND LINK"
ENDIF ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include\masm32rt.inc

    bsearch PROTO :DWORD,:DWORD,:DWORD,:DWORD,:DWORD

    .data
      alphab db "abcdefghijklmnopqrstuvwxyz0",0
      alpha dd alphab

      longpattern db "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",0
      longp dd longpattern


    .code

start:
   
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    call main
    inkey
    exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

main proc

    fn bsearch,0,alpha,26,"abc",3
    print str$(eax)," OFFSET lead bytes",13,10

    fn bsearch,0,alpha,26,"xyz",3
    print str$(eax)," OFFSET trail bytes",13,10

    fn bsearch,0,alpha,26,"xxx",3
    print str$(eax)," no match test",13,10

    fn bsearch,0,alpha,26,"xyz0",4
    print str$(eax)," end overrun test",13,10

    fn bsearch,0,alpha,26,"xy0",3
    print str$(eax)," end underrun test",13,10

    fn bsearch,0,alpha,26,longp,52
    print str$(eax)," pattern too long test",13,10

    fn bsearch,64,alpha,26,"bcde",4
    print str$(eax)," start out of range test",13,10

    ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

bsearch proc stpos:DWORD,src:DWORD,slen:DWORD,patn:DWORD,plen:DWORD

  ; ---------------------------------------------------------
  ; ARGUMENTS
  ; ---------
  ; 1. stpos        offset from src start address
  ; 2. src          start address of data to search
  ; 3. slen         1 based length of src
  ; 4. patn         address of pattern to search for
  ; 5. plen         1 based length of pattern

  ; RETURN VALUES
  ; -------------
  ; 0 or greater    OFFSET of matched pattern
  ; -1              pattern not found
  ; -2              pattern > source length
  ; -3              start OFFSET > last valid search position
  ; ---------------------------------------------------------

    LOCAL epos  :DWORD

    push ebx
    push esi
    push edi

    mov eax, slen                   ; load source length
    sub eax, plen                   ; calculate last search position
    js errquit1                     ; if patn > src length
    cmp stpos, eax
    jg errquit2                     ; if startpos > last sch position

    mov epos, eax                   ; store last valid sch position

    mov esi, src                    ; load the source address in ESI
    mov edi, patn                   ; load the pattern address in EDI
    movzx ebx, BYTE PTR [edi]       ; get the 1st byte of patn

    add epos, esi                   ; add ESI to it for exit count in ESI
    sub epos, 1                     ; correct for zero based scanloop
    sub plen, 1                     ; correct for zero based match loop counter
    sub esi, 1
    jmp scanloop

  scanloop:
    add esi, 1
    cmp [esi], bl                   ; cmp current byte to patn 1st byte
    je matchloop                    ; branch to match loop
    cmp esi, epos                   ; cmp ESI to last valid search position
    jle scanloop                    ; loop back if less than or equal

    jmp nomatch

  matchloop:
    or edx, -1                      ; use edx as the index for matching
  mloop:
    add edx, 1
    movzx eax, BYTE PTR [esi+edx]
    cmp al, [edi+edx]
    jne scanloop                    ; exit on mismatch within valid range
    cmp edx, plen
    jne mloop

    sub esi, src
    mov eax, esi
    jmp quit

  nomatch:
    mov eax, -1
    jmp quit

  errquit1:
    mov eax, -2
    jmp quit

  errquit2:
    mov eax, -3
    jmp quit

  quit:
    pop edi
    pop esi
    pop ebx

    ret

bsearch endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

dedndave

couldn't these be combined into a single arg ?
  ; 1. stpos        offset from src start address
  ; 2. src          start address of data to search

i am thinking address, length, address, length
i don't get it - isn't the first time - won't be the last   :lol

hutch--

Dave,

Specifying the start offset allows you to loop through a source which has more than 1 pattern match. If you find a match at offset 1234 you do the next search from offset 1235. In the leading calculations the difference between the start offset and the start of the source is used to determine if the pattern still fits into the remaining bytes.

I think you could write an algo that worked both as input and output as addresses but this one returns an offset from the source, not an absolute address.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

ToutEnMasm


If you rewrite it perhaps you can add a text search not case sensitive as this one:

Quote
;arguments
;start position ,Base 1
;@ of chain where the search is done
;@ of chain to search
;0 or 1,mean 0=Binary search,1 ascii search with no case
;return eax ,offset base 1  (start pointer + offset - 1= adress found)

ChercherChaine proc Debut:DWORD,Contenant:DWORD,Chercher:DWORD,Methode:DWORD

   Local LongCont       :DWORD
   Local PointCont      :DWORD
   Local LongCherche    :DWORD
   Local DeplaceMax     :DWORD
   local retour         :DWORD
   Pushad 
   mov retour,0   ;par défaut échec
   mov eax,Debut

   .if eax < 1
      mov retour,0
      jmp FinChercherChaine
   .endif

   invoke lnstr,Contenant

   .if eax == -1
      mov retour,-1
      jmp FinChercherChaine      
   .elseif eax < 1
      mov retour,0
      jmp FinChercherChaine
   .endif
   mov LongCont,eax
   
   invoke lnstr,Chercher
   .if eax == -1
      mov retour,-1
      jmp FinChercherChaine      
   .elseif eax < 1
      mov retour,0
      jmp FinChercherChaine
   .endif

   ;la chaine de recherche doit etre <= en taille a l'autre
   mov LongCherche,eax
   mov ecx,LongCont
   cmp eax,ecx
   jle ValidData
      mov retour,0
      jmp FinChercherChaine
ValidData:
   ;la position de depart+long cherche =< long contenant si = deplace =0 ecx,1
   mov eax, Debut        ;relatif a la chaine
   add eax,LongCherche
   dec eax
   .if eax > LongCont
      mov retour,0
      jmp FinChercherChaine
   .endif
   mov ecx,LongCont
   inc ecx           ;sinon -1 en sortie pour zero dep
   sub ecx,eax
   mov DeplaceMax,ecx ;nombre de déplacements possible du pointeur de recherche

   ;Valid data ,begin search

   cld
   PuPo PointCont,Debut
   dec  PointCont       
   mov esi,Contenant
   add esi,Debut           
   dec esi           
   dec esi                 
   mov edi,Chercher       
ProgresseContenant:
   inc esi
   inc PointCont           
   xor ebx,ebx
   xor eax,eax
   xor edx,edx             
ProgresseContenu:
   mov al,byte ptr [esi+ebx]
   .if Methode == 1        ;en majuscules
      .if eax > 96 && eax < 123
         sub al,32
      .endif
   .endif
   mov dl,byte ptr [edi+ebx]
   .if Methode ==1        ;en majuscules
      .if edx > 96 && edx < 123
         sub dl,32
      .endif
   .endif

   inc ebx
   cmp al,dl
   jnz NegatifResult
   cmp ebx,LongCherche
   jnz ProgresseContenu   
   jmp ChaineTrouve
NegatifResult:
   dec DeplaceMax     
   jnz ProgresseContenant
   mov retour,0      
   jmp FinChercherChaine
ChaineTrouve:
   mov eax,PointCont     
   mov retour,eax

FinChercherChaine:
popad
mov eax,retour     
ret
chercherChaine endp


ramguru

the algo doesn't seem to work well (and I mean Hutch's algo)
let's say I have this as a test



    .data
      alphab db "ABCabcABCabcABCabcABCabc",0
      alpha dd alphab

    .code

start:
   
    call main
    inkey
    exit

main proc uses edi

    xor   edi, edi
    print str$(edi)," was OFFSET where search started",13,10
    fn bsearch,edi,alpha,24,"abc",3
    mov   edi, eax
    print str$(eax)," is OFFSET of found substr",13,10
   
    add   edi, 3
    print str$(edi)," was OFFSET where search started",13,10
    fn bsearch,edi,alpha,24,"abc",3
    mov   edi, eax
    print str$(eax)," is OFFSET of found substr",13,10
   
    add   edi, 3
    print str$(edi)," was OFFSET where search started",13,10
    fn bsearch,edi,alpha,24,"abc",3
    mov   edi, eax
    print str$(eax)," is OFFSET of found substr",13,10



.. and the output I get:

0 was OFFSET where search started
3 is OFFSET of found substr
6 was OFFSET where search started
3 is OFFSET of found substr
6 was OFFSET where search started
3 is OFFSET of found substr
Press any key to continue ...


So IT doesn't care about stpos variable

ToutEnMasm


here is what i get
Quote
0 OFFSET lead bytes
23 OFFSET trail bytes
-1 no match test
-1 end overrun test
-1 end underrun test
-2 pattern too long test
-3 start out of range test

ramguru

#6
OK the simple fix would be

    add epos, esi                   ; add ESI to it for exit count in ESI
    sub epos, 1                     ; correct for zero based scanloop
    sub plen, 1                     ; correct for zero based match loop counter
    add esi, stpos


And it works well better then

later>

and again same mistake  :clap:
the algo should check for the end condition before it gets into matchloop or it will go past valid src range

  scanloop:
    add esi, 1
    cmp [esi], bl                   ; cmp current byte to patn 1st byte
    je matchloop                    ; branch to match loop
    cmp esi, epos                   ; cmp ESI to last valid search position
    jle scanloop                    ; loop back if less than or equal
    jmp nomatch                 ; this jump should be taken before going to matchloop

hutch--

Yes you are correct that the start offset has not yet been added to the start address, the tests so far have been to ensure that the offset returned on a match was correct and that it neither overran or underran the match length.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

hutch--

Here is the later version that increments the optional starting offset to apply. It has a sequence search text and a benchmark added. On this quad its running at about 1.4 gig/sec with the search pattern in the test piece. Interestingly enough it does not respond to either unrolling either loop or aligning the labels so its probably memory bound.

RE: ramguru's comments, it appears you have not understood the leading test code that excludes the condition you have referred to. The idea is to do the exclusion testing before your loop code so you don't have to make a mess of it with additional conditional testing.


IF 0  ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
                      Build this template with "CONSOLE ASSEMBLE AND LINK"
ENDIF ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include\masm32rt.inc

    bsearch PROTO :DWORD,:DWORD,:DWORD,:DWORD,:DWORD

    .data
      alphab db "abcdefghijklmnopqrstuvwxyz0"
      alpha dd alphab

      longpattern db "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
      longp dd longpattern
      llong dd LENGTHOF longpattern

      repeat_pattern db "bc..abc....abc........abc...abc......abc...abc........ab"
      rpt  dd repeat_pattern
      lrpt dd LENGTHOF repeat_pattern

    .code

start:
   
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    call main
    inkey
    exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

main proc

    LOCAL cloc  :DWORD
    LOCAL pmem  :DWORD
    LOCAL pstr  :DWORD

    push ebx
    push esi
    push edi

    fn bsearch,0,alpha,26,"abc",3
    print str$(eax)," OFFSET lead bytes",13,10

    fn bsearch,0,alpha,26,"xyz",3
    print str$(eax)," OFFSET trail bytes",13,10

    fn bsearch,0,alpha,26,"xxx",3
    print str$(eax)," no match test",13,10

    fn bsearch,0,alpha,26,"xyz0",4
    print str$(eax)," end overrun test",13,10

    fn bsearch,0,alpha,26,"xy0",3
    print str$(eax)," end underrun test",13,10

    fn bsearch,0,alpha,26,longp,52
    print str$(eax)," pattern too long test",13,10

    fn bsearch,64,alpha,26,"bcde",4
    print str$(eax)," start out of range test",13,10,13,10

  ; =====================================

    mov cloc, 0

  lpsch:
    mov cloc, rv(bsearch,cloc,rpt,lrpt,"abc",3) ; get OFFSET of next instance of pattern
    cmp cloc, 0
    jl done                                     ; anything below zero is not a match
    print "'abc' at OFFSET "
    print str$(cloc),13,10
    add cloc, 1                                 ; increment
    jmp lpsch

  done:
    print "Thats all folks",13,10,13,10

  ; =====================================

    print "Benchmark algo",13,10,13,10

  ; ------------------------------------------------------------
  ; fill memory with 1 million copies of the following byte data
  ; ------------------------------------------------------------

    sas pstr, "...abc...bc...ab...abc...bc..."  ; 30 bytes

    mov pmem, alloc(1024*1024*30)               ; allocate space for 1 million copies

  ; ---------------------
  ; write the test buffer
  ; ---------------------
    xor ebx, ebx
  stloop:
    mov esi, pstr
    mov edi, pmem
    mov ecx, 30
    rep movsb

    add edi, 30

    add ebx, 1
    cmp ebx, 1024*1024
    jl stloop
  ; ---------------------

    invoke GetTickCount
    push eax

    mov cloc, 0                 ; set current location counter to zero
    mov esi, pmem               ; memory address in ESI
    mov edi, 100                ; loop complete test 100 times

  ; =============================

  oloop:                        ; outer loop

  ; -----------------------------

  timloop:
    mov cloc, rv(bsearch,cloc,pmem,1024*1024*30,"abc",3)
    cmp cloc, 0
    jl timout
    add cloc, 1
    jmp timloop
  timout:

  ; -----------------------------

    sub edi, 1
    jnz oloop

  ; =============================

    invoke GetTickCount
    pop ecx
    sub eax, ecx

    push eax
    print "3 gigabyte scanned in "
    pop eax
    print str$(eax)," ms",13,10

    free pmem

    pop edi
    pop esi
    pop ebx

    ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

bsearch proc stpos:DWORD,src:DWORD,slen:DWORD,patn:DWORD,plen:DWORD

  ; ---------------------------------------------------------
  ; ARGUMENTS
  ; ---------
  ; 1. stpos        offset from src start address
  ; 2. src          start address of data to search
  ; 3. slen         1 based length of src
  ; 4. patn         address of pattern to search for
  ; 5. plen         1 based length of pattern

  ; RETURN VALUES
  ; -------------
  ; 0 or greater    OFFSET of matched pattern
  ; -1              pattern not found
  ; -2              pattern > source length
  ; -3              start OFFSET > last valid search position
  ; ---------------------------------------------------------

    push ebx
    push esi
    push edi

    mov eax, slen                   ; load source length
    sub eax, plen                   ; calculate last search position
    js errquit1                     ; if patn > src length
    cmp stpos, eax
    jg errquit2                     ; if startpos > last sch position

    mov ecx, eax                    ; store last valid sch position

    mov esi, src                    ; load the source address in ESI
    mov edi, patn                   ; load the pattern address in EDI
    movzx ebx, BYTE PTR [edi]       ; get the 1st byte of patn

    add ecx, esi                    ; add ESI to it for exit count in ESI
    sub ecx, 1                      ; correct for zero based scanloop
    sub plen, 1                     ; correct for zero based match loop counter
    sub esi, 1
    add esi, stpos                  ; add the starting offset

  ; ---------------------------------
  scanloop:
    add esi, 1
    cmp [esi], bl                   ; cmp current byte to patn 1st byte
    je matchloop                    ; branch to match loop
    cmp esi, ecx                    ; cmp ESI to last valid search position
    jle scanloop                    ; loop back if less than or equal
  ; ---------------------------------
    jmp nomatch

  matchloop:
    or edx, -1                      ; use edx as the index for matching
  ; =================================
  mloop:
    add edx, 1
    movzx eax, BYTE PTR [esi+edx]
    cmp al, [edi+edx]
    jne scanloop                    ; exit on mismatch within valid range
    cmp edx, plen
    jne mloop
  ; =================================
  match:
    sub esi, src
    mov eax, esi
    jmp quit

  nomatch:
    mov eax, -1
    jmp quit

  errquit1:
    mov eax, -2
    jmp quit

  errquit2:
    mov eax, -3
    jmp quit

  quit:
    pop edi
    pop esi
    pop ebx

    ret

bsearch endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start

Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

ramguru

as I said earlier it still has a bug of going past valid address, here's appropriate data to expose the problem:

    .data
      src_str db "aaaaaaaaaaaaaabc"
              db " very"
              db " bad"
      pat_str1 db "abc"
      pat_str2 db "abc very"
      pat_str3 db "abc very bad"
     
    .code

start:
   
    call main
    inkey
    exit

main proc

    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str1, 3
    print str$(eax)," is OFFSET of 'abc'",13,10
    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str2, 8
    print str$(eax)," is OFFSET of 'abc very'",13,10
    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str3, 12
    print str$(eax)," is OFFSET of 'abc very bad'",13,10



If the algo keeps finding first pattern byte
code block

    cmp esi, ecx                    ; cmp ESI to last valid search position
    jle scanloop                    ; loop back if less than or equal
  ; ---------------------------------
    jmp nomatch

is never accessed or accessed too early

hutch--

OK, this version looks like it does the job. I would like to thank ramguru for providing the test conditions, it makes up for the smartarse wisecracks. Just remember I don't get paid for writing this stuff.


IF 0  ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
                      Build this template with "CONSOLE ASSEMBLE AND LINK"
ENDIF ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include\masm32rt.inc

    bsearch PROTO :DWORD,:DWORD,:DWORD,:DWORD,:DWORD

    .data
      alphab db "abcdefghijklmnopqrstuvwxyz0"
      alpha dd alphab

      longpattern db "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
      longp dd longpattern
      llong dd LENGTHOF longpattern

      repeat_pattern db "bc..abc....abc........abc...abc......abc...abc........ab"
      rpt  dd repeat_pattern
      lrpt dd LENGTHOF repeat_pattern

      src_str db "aaaaaaaaaaaaaabc"
              db " very"
              db " bad"
      pat_str1 db "abc"
      pat_str2 db "abc very"
      pat_str3 db "abc very bad"

    .code

start:
   
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    call main
    inkey
    exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

main proc

    LOCAL cloc  :DWORD
    LOCAL pmem  :DWORD
    LOCAL pstr  :DWORD

    push ebx
    push esi
    push edi

    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str1, 3
    print str$(eax)," is OFFSET of 'abc'",13,10

    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str2, 8
    print str$(eax)," is OFFSET of 'abc very'",13,10

    fn bsearch, 0, ADDR src_str, 16, ADDR pat_str3, 12
    print str$(eax)," is OFFSET of 'abc very bad'",13,10

    ;;; jmp bye


    fn bsearch,0,alpha,26,"abc",3
    print str$(eax)," OFFSET lead bytes",13,10

    fn bsearch,0,alpha,26,"xyz",3
    print str$(eax)," OFFSET trail bytes",13,10

    fn bsearch,0,alpha,26,"xxx",3
    print str$(eax)," no match test",13,10

    fn bsearch,0,alpha,26,"xyz0",4
    print str$(eax)," end overrun test",13,10

    fn bsearch,0,alpha,26,"xy0",3
    print str$(eax)," end underrun test",13,10

    fn bsearch,0,alpha,26,longp,52
    print str$(eax)," pattern too long test",13,10

    fn bsearch,64,alpha,26,"bcde",4
    print str$(eax)," start out of range test",13,10,13,10

  ; =====================================

    mov cloc, 0

  lpsch:
    mov cloc, rv(bsearch,cloc,rpt,lrpt,"abc",3) ; get OFFSET of next instance of pattern
    cmp cloc, 0
    jl done                                     ; anything below zero is not a match
    print "'abc' at OFFSET "
    print str$(cloc),13,10
    add cloc, 1                                 ; increment
    jmp lpsch

  done:
    print "Thats all folks",13,10,13,10

  ; =====================================

    print "Benchmark algo",13,10,13,10

  ; ------------------------------------------------------------
  ; fill memory with 1 million copies of the following byte data
  ; ------------------------------------------------------------

    sas pstr, "...abc...bc...ab...abc...bc..."  ; 30 bytes

    mov pmem, alloc(1024*1024*30)               ; allocate space for 1 million copies

  ; ---------------------
  ; write the test buffer
  ; ---------------------
    xor ebx, ebx
  stloop:
    mov esi, pstr
    mov edi, pmem
    mov ecx, 30
    rep movsb

    add edi, 30

    add ebx, 1
    cmp ebx, 1024*1024
    jl stloop
  ; ---------------------

    invoke GetTickCount
    push eax

    mov cloc, 0                 ; set current location counter to zero
    mov esi, pmem               ; memory address in ESI
    mov edi, 100                ; loop complete test 100 times

  ; =============================

  oloop:                        ; outer loop

  ; -----------------------------

  timloop:
    mov cloc, rv(bsearch,cloc,pmem,1024*1024*30,"abc",3)
    cmp cloc, 0
    jl timout
    add cloc, 1
    jmp timloop
  timout:

  ; -----------------------------

    sub edi, 1
    jnz oloop

  ; =============================

    invoke GetTickCount
    pop ecx
    sub eax, ecx

    push eax
    print "3 gigabyte scanned in "
    pop eax
    print str$(eax)," ms",13,10

    free pmem

  bye:

    pop edi
    pop esi
    pop ebx

    ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

align 16

bsearch proc stpos:DWORD,src:DWORD,slen:DWORD,patn:DWORD,plen:DWORD

  ; ---------------------------------------------------------
  ; ARGUMENTS
  ; ---------
  ; 1. stpos        offset from src start address
  ; 2. src          start address of data to search
  ; 3. slen         1 based length of src
  ; 4. patn         address of pattern to search for
  ; 5. plen         1 based length of pattern

  ; RETURN VALUES
  ; -------------
  ; 0 or greater    OFFSET of matched pattern
  ; -1              pattern not found
  ; -2              pattern > source length
  ; -3              start OFFSET > last valid search position
  ; ---------------------------------------------------------
    push ebx
    push esi
    push edi
    push ebp

    mov ecx, [esp+24][4]            ; load source length
    sub ecx, [esp+32][4]            ; calculate last search position
    js errquit1                     ; if pattern > source length
    cmp [esp+16][4], ecx
    jg errquit2                     ; if startpos > last sch position
    mov esi, [esp+20][4]            ; load the source address in ESI
    mov edi, [esp+28][4]            ; load the pattern address in EDI
    movzx ebx, BYTE PTR [edi]       ; get the 1st byte of the pattern
    add ecx, esi                    ; add ESI to the exit count
    sub DWORD PTR [esp+32][4], 1    ; correct for zero based match loop counter
    sub esi, 1
    add esi, DWORD PTR [esp+16][4]  ; add the starting offset
    mov ebp, [esp+32][4]            ; put pattern length into EBP
  ; ---------------------------------
  scanloop:
    add esi, 1
    cmp esi, ecx                    ; exit on end location
    jg nomatch
    cmp [esi], bl                   ; cmp current byte to 1st byte in pattern
    jne scanloop
  ; ---------------------------------
  matchloop:
    or edx, -1                      ; use edx as the index for matching
  ; =================================
  mloop:
    add edx, 1
    mov al, [esi+edx]
    cmp al, [edi+edx]
    jne scanloop                    ; exit on mismatch within valid range
    cmp edx, ebp                    ; compare counter against pattern length
    jne mloop
  ; =================================
    sub esi, [esp+20][4]            ; on match, calculate length
    mov eax, esi                    ; return it in EAX
    jmp quit
  nomatch:
    mov eax, -1
    jmp quit
  errquit1:
    mov eax, -2
    jmp quit
  errquit2:
    mov eax, -3
    jmp quit

  quit:
    pop ebp
    pop edi
    pop esi
    pop ebx

    ret 20

bsearch endp

OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

ramguru

good job :U
done with bug reports for now :>