What exactly is wrong with Instring?

Started by ecube, July 10, 2010, 01:06:41 AM

i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it? here's the code

InString proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD

  ; ------------------------------------------------------------------
  ; InString searches for a substring in a larger string and if it is
  ; found, it returns its position in eax.
  ; It uses a one (1) based character index (1st character is 1,
  ; 2nd is 2 etc...) for both the "StartPos" parameter and the returned
  ; character position.
  ; Return Values.
  ; If the function succeeds, it returns the 1 based index of the start
  ; of the substring.
  ;  0 = no match found
  ; -1 = substring same length or longer than main string
  ; -2 = "StartPos" parameter out of range (less than 1 or longer than
  ; main string)
  ; ------------------------------------------------------------------


    push ebx
    push esi
    push edi

    invoke StrLen,lpSource
    mov sLen, eax           ; source length
    invoke StrLen,lpPattern
    mov pLen, eax           ; pattern length

    cmp startpos, 1
    jge @F
    mov eax, -2
    jmp isOut               ; exit if startpos not 1 or greater

    dec startpos            ; correct from 1 to 0 based index

    cmp  eax, sLen
    jl @F
    mov eax, -1
    jmp isOut               ; exit if pattern longer than source

    sub sLen, eax           ; don't read past string end
    inc sLen

    mov ecx, sLen
    cmp ecx, startpos
    jg @F
    mov eax, -2
    jmp isOut               ; exit if startpos is past end

  ; ----------------
  ; setup loop code
  ; ----------------
    mov esi, lpSource
    mov edi, lpPattern
    mov al, [edi]           ; get 1st char in pattern

    add esi, ecx            ; add source length
    neg ecx                 ; invert sign
    add ecx, startpos       ; add starting offset

    jmp Scan_Loop

    align 16

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

    inc ecx                 ; start on next byte

    cmp al, [esi+ecx]       ; scan for 1st byte of pattern
    je Pre_Match            ; test if it matches
    inc ecx
    js Scan_Loop            ; exit on sign inversion

    jmp No_Match

    lea ebx, [esi+ecx]      ; put current scan address in EBX
    mov edx, pLen           ; put pattern length into EDX

    mov ah, [ebx+edx-1]     ; load last byte of pattern length in main string
    cmp ah, [edi+edx-1]     ; compare it with last byte in pattern
    jne Pre_Scan            ; jump back on mismatch
    dec edx
    jnz Test_Match          ; 0 = match, fall through on match

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

    add ecx, sLen
    mov eax, ecx
    inc eax
    jmp isOut
    xor eax, eax

    pop edi
    pop esi
    pop ebx


InString endp


Also i've had issues with BinSearch, it for some reason doesn't always find chars in a string correctly(ones that are supported according to the comments) so I wrote this, it's probably not faster but it seems to be correct, returns -1 if not found otherwise returns the start position like binsearch does

Masm version

BinSearchv proc istart,iBuf,iLen,iSearchstr,iSearchlen
mov pos,0
xor eax,eax
xor edx,edx
mov esi,iBuf
mov edi,iSearchstr
mov ecx,istart
cmp ecx,iLen
jge @Notfound
add esi,ecx

add pos,1
mov ecx,iLen
cmp pos,ecx
jge @Notfound
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
jmp @mainloop

add pos,1
add edx,1
cmp edx,iSearchlen
jge @found
cmp edx,iLen
jge @Notfound
inc esi
inc edi
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
mov edi,iSearchstr
xor edx,edx
jmp @mainloop

mov eax,pos
sub eax,iSearchlen
sub eax,1

mov eax,-1
BinSearchv endp

Goasm Version

BinSearchv FRAME istart,iBuf,iLen,iSearchstr,iSearchlen
mov D[pos],0
xor eax,eax
xor edx,edx
mov esi,[iBuf]
mov edi,[iSearchstr]
mov ecx,[istart]
cmp ecx,[iLen]
jge > @Notfound
add esi,ecx

add D[pos],1
mov ecx,[iLen]
cmp D[pos],ecx
jge > @Notfound
mov al,B[esi]
cmp B[edi],al
je > @checkloop
inc esi
jmp < @mainloop

add D[pos],1
add edx,1
cmp edx,[iSearchlen]
jge > @found
cmp edx,[iLen]
jge > @Notfound
inc esi
inc edi
mov al,B[esi]
cmp B[edi],al
je < @checkloop
inc esi
mov edi,[iSearchstr]
xor edx,edx
jmp @mainloop

mov eax,[pos]
sub eax,[iSearchlen]
sub eax,1

mov eax,-1



I would be interested to see if InString when used correctly found what you are looking for in the main string. It can be tweaked to make it a bit faster but its rarely ever a problem with its target usage.
Can you show these links? I know only this one, and that has been fixed long ago...


Idk it's been a while, there's this and but neither say the original is broken, so nevermind.