The MASM Forum Archive 2004 to 2012

General Forums => The Campus => Topic started by: ecube on July 10, 2010, 01:06:41 AM

Title: What exactly is wrong with Instring?
Post by: ecube on July 10, 2010, 01:06:41 AM
i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it? here's the code


InString proc startpos:DWORD,lpSource:DWORD,lpPattern:DWORD

  ; ------------------------------------------------------------------
  ; InString searches for a substring in a larger string and if it is
  ; found, it returns its position in eax.
  ;
  ; It uses a one (1) based character index (1st character is 1,
  ; 2nd is 2 etc...) for both the "StartPos" parameter and the returned
  ; character position.
  ;
  ; Return Values.
  ; If the function succeeds, it returns the 1 based index of the start
  ; of the substring.
  ;  0 = no match found
  ; -1 = substring same length or longer than main string
  ; -2 = "StartPos" parameter out of range (less than 1 or longer than
  ; main string)
  ; ------------------------------------------------------------------

    LOCAL sLen:DWORD
    LOCAL pLen:DWORD

    push ebx
    push esi
    push edi

    invoke StrLen,lpSource
    mov sLen, eax           ; source length
    invoke StrLen,lpPattern
    mov pLen, eax           ; pattern length

    cmp startpos, 1
    jge @F
    mov eax, -2
    jmp isOut               ; exit if startpos not 1 or greater
  @@:

    dec startpos            ; correct from 1 to 0 based index

    cmp  eax, sLen
    jl @F
    mov eax, -1
    jmp isOut               ; exit if pattern longer than source
  @@:

    sub sLen, eax           ; don't read past string end
    inc sLen

    mov ecx, sLen
    cmp ecx, startpos
    jg @F
    mov eax, -2
    jmp isOut               ; exit if startpos is past end
  @@:

  ; ----------------
  ; setup loop code
  ; ----------------
    mov esi, lpSource
    mov edi, lpPattern
    mov al, [edi]           ; get 1st char in pattern

    add esi, ecx            ; add source length
    neg ecx                 ; invert sign
    add ecx, startpos       ; add starting offset

    jmp Scan_Loop

    align 16

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  Pre_Scan:
    inc ecx                 ; start on next byte

  Scan_Loop:
    cmp al, [esi+ecx]       ; scan for 1st byte of pattern
    je Pre_Match            ; test if it matches
    inc ecx
    js Scan_Loop            ; exit on sign inversion

    jmp No_Match

  Pre_Match:
    lea ebx, [esi+ecx]      ; put current scan address in EBX
    mov edx, pLen           ; put pattern length into EDX

  Test_Match:
    mov ah, [ebx+edx-1]     ; load last byte of pattern length in main string
    cmp ah, [edi+edx-1]     ; compare it with last byte in pattern
    jne Pre_Scan            ; jump back on mismatch
    dec edx
    jnz Test_Match          ; 0 = match, fall through on match

  ; @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

  Match:
    add ecx, sLen
    mov eax, ecx
    inc eax
    jmp isOut
   
  No_Match:
    xor eax, eax

  isOut:
    pop edi
    pop esi
    pop ebx

    ret

InString endp
Title: Re: What exactly is wrong with Instring?
Post by: ecube on July 10, 2010, 01:17:16 AM
Also i've had issues with BinSearch, it for some reason doesn't always find chars in a string correctly(ones that are supported according to the comments) so I wrote this, it's probably not faster but it seems to be correct, returns -1 if not found otherwise returns the start position like binsearch does

Masm version

BinSearchv proc istart,iBuf,iLen,iSearchstr,iSearchlen
LOCAL pos:DWORD
mov pos,0
xor eax,eax
xor edx,edx
mov esi,iBuf
mov edi,iSearchstr
mov ecx,istart
cmp ecx,iLen
jge @Notfound
add esi,ecx

@mainloop:
add pos,1
mov ecx,iLen
cmp pos,ecx
jge @Notfound
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
jmp @mainloop

@checkloop:
add pos,1
add edx,1
cmp edx,iSearchlen
jge @found
cmp edx,iLen
jge @Notfound
inc esi
inc edi
mov al,byte ptr [esi]
cmp byte ptr[edi],al
je @checkloop
inc esi
mov edi,iSearchstr
xor edx,edx
jmp @mainloop

@found:
mov eax,pos
sub eax,iSearchlen
sub eax,1
ret

@Notfound:
mov eax,-1
ret
BinSearchv endp


Goasm Version

BinSearchv FRAME istart,iBuf,iLen,iSearchstr,iSearchlen
LOCAL pos:D
mov D[pos],0
xor eax,eax
xor edx,edx
mov esi,[iBuf]
mov edi,[iSearchstr]
mov ecx,[istart]
cmp ecx,[iLen]
jge > @Notfound
add esi,ecx

@mainloop:
add D[pos],1
mov ecx,[iLen]
cmp D[pos],ecx
jge > @Notfound
mov al,B[esi]
cmp B[edi],al
je > @checkloop
inc esi
jmp < @mainloop

@checkloop:
add D[pos],1
add edx,1
cmp edx,[iSearchlen]
jge > @found
cmp edx,[iLen]
jge > @Notfound
inc esi
inc edi
mov al,B[esi]
cmp B[edi],al
je < @checkloop
inc esi
mov edi,[iSearchstr]
xor edx,edx
jmp @mainloop

@found:
mov eax,[pos]
sub eax,[iSearchlen]
sub eax,1
ret

@Notfound:
mov eax,-1
ret
ENDF 
Title: Re: What exactly is wrong with Instring?
Post by: hutch-- on July 10, 2010, 11:57:50 AM
Cube,

I would be interested to see if InString when used correctly found what you are looking for in the main string. It can be tweaked to make it a bit faster but its rarely ever a problem with its target usage.
Title: Re: What exactly is wrong with Instring?
Post by: jj2007 on July 10, 2010, 12:27:58 PM
Quote from: E^cube on July 10, 2010, 01:06:41 AM
i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it?

Can you show these links? I know only this one (http://www.asmcommunity.net/board/index.php?action=printpage;topic=6320.0), and that has been fixed long ago...
Title: Re: What exactly is wrong with Instring?
Post by: ecube on July 12, 2010, 09:01:44 PM
Quote from: jj2007 on July 10, 2010, 12:27:58 PM
Quote from: E^cube on July 10, 2010, 01:06:41 AM
i've seen on other sites about it being broken or bugged, on more than one occasion, so whats the problem with it?

Can you show these links? I know only this one (http://www.asmcommunity.net/board/index.php?action=printpage;topic=6320.0), and that has been fixed long ago...

Idk it's been a while, there's this http://madwizard.org/programming/snippets?id=57 and http://madwizard.org/programming/snippets?id=48 but neither say the original is broken, so nevermind.