News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

What exactly is wrong with Instring?

Started by ecube, July 10, 2010, 01:06:41 AM

Previous topic - Next topic

ecube

i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it? here's the code


InString proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD

  ; ------------------------------------------------------------------
  ; InString searches for a substring in a larger string and if it is
  ; found, it returns its position in eax.
  ;
  ; It uses a one (1) based character index (1st character is 1,
  ; 2nd is 2 etc...) for both the "StartPos" parameter and the returned
  ; character position.
  ;
  ; Return Values.
  ; If the function succeeds, it returns the 1 based index of the start
  ; of the substring.
  ;  0 = no match found
  ; -1 = substring same length or longer than main string
  ; -2 = "StartPos" parameter out of range (less than 1 or longer than
  ; main string)
  ; ------------------------------------------------------------------

    LOCAL sLen:DWORD
    LOCAL pLen:DWORD

    push ebx
    push esi
    push edi

    invoke StrLen,lpSource
    mov sLen, eax           ; source length
    invoke StrLen,lpPattern
    mov pLen, eax           ; pattern length

    cmp startpos, 1
    jge @F
    mov eax, -2
    jmp isOut               ; exit if startpos not 1 or greater
  @@:

    dec startpos            ; correct from 1 to 0 based index

    cmp  eax, sLen
    jl @F
    mov eax, -1
    jmp isOut               ; exit if pattern longer than source
  @@:

    sub sLen, eax           ; don't read past string end
    inc sLen

    mov ecx, sLen
    cmp ecx, startpos
    jg @F
    mov eax, -2
    jmp isOut               ; exit if startpos is past end
  @@:

  ; ----------------
  ; setup loop code
  ; ----------------
    mov esi, lpSource
    mov edi, lpPattern
    mov al, [edi]           ; get 1st char in pattern

    add esi, ecx            ; add source length
    neg ecx                 ; invert sign
    add ecx, startpos       ; add starting offset

    jmp Scan_Loop

    align 16

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  Pre_Scan:
    inc ecx                 ; start on next byte

  Scan_Loop:
    cmp al, [esi+ecx]       ; scan for 1st byte of pattern
    je Pre_Match            ; test if it matches
    inc ecx
    js Scan_Loop            ; exit on sign inversion

    jmp No_Match

  Pre_Match:
    lea ebx, [esi+ecx]      ; put current scan address in EBX
    mov edx, pLen           ; put pattern length into EDX

  Test_Match:
    mov ah, [ebx+edx-1]     ; load last byte of pattern length in main string
    cmp ah, [edi+edx-1]     ; compare it with last byte in pattern
    jne Pre_Scan            ; jump back on mismatch
    dec edx
    jnz Test_Match          ; 0 = match, fall through on match

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  Match:
    add ecx, sLen
    mov eax, ecx
    inc eax
    jmp isOut
   
  No_Match:
    xor eax, eax

  isOut:
    pop edi
    pop esi
    pop ebx

    ret

InString endp

ecube

Also i've had issues with BinSearch, it for some reason doesn't always find chars in a string correctly(ones that are supported according to the comments) so I wrote this, it's probably not faster but it seems to be correct, returns -1 if not found otherwise returns the start position like binsearch does

Masm version

BinSearchv proc istart,iBuf,iLen,iSearchstr,iSearchlen
LOCAL pos:DWORD
mov pos,0
xor eax,eax
xor edx,edx
mov esi,iBuf
mov edi,iSearchstr
mov ecx,istart
cmp ecx,iLen
jge @Notfound
add esi,ecx

@mainloop:
add pos,1
mov ecx,iLen
cmp pos,ecx
jge @Notfound
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
jmp @mainloop

@checkloop:
add pos,1
add edx,1
cmp edx,iSearchlen
jge @found
cmp edx,iLen
jge @Notfound
inc esi
inc edi
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
mov edi,iSearchstr
xor edx,edx
jmp @mainloop

@found:
mov eax,pos
sub eax,iSearchlen
sub eax,1
ret

@Notfound:
mov eax,-1
ret
BinSearchv endp


Goasm Version

BinSearchv FRAME istart,iBuf,iLen,iSearchstr,iSearchlen
LOCAL pos:D
mov D[pos],0
xor eax,eax
xor edx,edx
mov esi,[iBuf]
mov edi,[iSearchstr]
mov ecx,[istart]
cmp ecx,[iLen]
jge > @Notfound
add esi,ecx

@mainloop:
add D[pos],1
mov ecx,[iLen]
cmp D[pos],ecx
jge > @Notfound
mov al,B[esi]
cmp B[edi],al
je > @checkloop
inc esi
jmp < @mainloop

@checkloop:
add D[pos],1
add edx,1
cmp edx,[iSearchlen]
jge > @found
cmp edx,[iLen]
jge > @Notfound
inc esi
inc edi
mov al,B[esi]
cmp B[edi],al
je < @checkloop
inc esi
mov edi,[iSearchstr]
xor edx,edx
jmp @mainloop

@found:
mov eax,[pos]
sub eax,[iSearchlen]
sub eax,1
ret

@Notfound:
mov eax,-1
ret
ENDF 

hutch--

Cube,

I would be interested to see if InString when used correctly found what you are looking for in the main string. It can be tweaked to make it a bit faster but its rarely ever a problem with its target usage.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

jj2007

Quote from: E^cube on July 10, 2010, 01:06:41 AM
i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it?

Can you show these links? I know only this one, and that has been fixed long ago...

ecube

Quote from: jj2007 on July 10, 2010, 12:27:58 PM
Quote from: E^cube on July 10, 2010, 01:06:41 AM
i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it?

Can you show these links? I know only this one, and that has been fixed long ago...

Idk it's been a while, there's this http://madwizard.org/programming/snippets?id=57 and http://madwizard.org/programming/snippets?id=48 but neither say the original is broken, so nevermind.