News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

IndexOf procedure

Started by Chambao, July 22, 2005, 05:07:08 PM

Previous topic - Next topic

Chambao

Hi

Does anybody have a fast "IndexOf" procedure
I am scanning for a single byte, and I am using this simple procedure right now, but when the buffer is large it is very slow.
I tried InString from the masm package but it is slower than the code below :s
I have seen your strLen optimizations, too bad it looks only for null bytes :(


find proc lpData:DWORD, char:DWORD
xor eax, eax
mov edx, lpData
mov ecx, char
.while byte ptr [edx+eax] != cl
    inc eax
.endw
ret
find endp


thank you

Randall Hyde

Quote from: Chambao on July 22, 2005, 05:07:08 PM
Hi

Does anybody have a fast "IndexOf" procedure
I am scanning for a single byte, and I am using this simple procedure right now, but when the buffer is large it is very slow.
I tried InString from the masm package but it is slower than the code below :s
I have seen your strLen optimizations, too bad it looks only for null bytes :(


find proc lpData:DWORD, char:DWORD
xor eax, eax
mov edx, lpData
mov ecx, char
.while byte ptr [edx+eax] != cl
    inc eax
.endw
ret
find endp


thank you

Replicate the char you are searching for four times in a register. Subtract that register value from a dword fetched from memory. Use the fast scan for zero algorithm.
Cheers,
Randy Hyde

Codewarp

This is my replacement for the memchr( ) routine, one that uses only integer cpu instructions--much faster than a simple byte seach.  It returns the address of the character found, or zero.  I have an mmx/sse version of this that is much faster on distant searches, which I can post if I see sufficient interest for it.  MemChr2( ), below, is in VC++ form, but maps easily to masm.


// search for charval in src string
// uses aligned dword loads for all memory access
// fast performance: strings of 1/10/100/1000 bytes require 18/24/114/866 cycles (Athlon64)

__declspec( naked ) void* MemChr2 (const void *src, int charval, size_t count) {
    _asm {
            push    ebx
            push    edi
            mov     edx, [esp + 12]         ; point edx to start of string
            mov     ebx, [esp + 16]         ; ebx = 8-bit charval
            mov     edi, [esp + 20]         ; edi holds the byte count
            and     ebx, 0xff
            add     edi, edx                ; edi points past the last byte to search
            imul     ebx, 1010101h          ; make 4 copies of charval in ebx (slow on p4)
            test    edx, 3                  ; test src alignment
            jnz     fixalign                ; branch if misaligned
            align   8                       ; this aligns next 2 labels--don't mess with the code in between
dwscan:     cmp     edx, edi                ; compare edx with final address
            jae     past                    ; return fail (0) after all requested bytes have been read
            mov     eax, [edx]              ; load next four bytes
            xor     eax, ebx                ; compare the character we are looking for with each byte
resume:     add     edx, 4                  ; advance ptr
            lea     ecx, [eax - 1010101h]   ; subtract 1 from each byte and put into ecx
            not     eax                     ; for each byte in ecx perform: (diff-1) & ~diff & 80h
            and     eax, 80808080h
            and     eax, ecx                ; all unequal bytes have bit7 == 0
            jz      dwscan                  ; repeat while no equality found

            test     eax, 8080h             ; test first two bytes
            jz      upper2                  ; jmp if not found in the lower 2 bytes
            shr     eax, 8                  ; set carry from bit7 of 1st byte
            sbb     edx, 3                  ; edx = (edx-4) + (1-carry)
            jmp     result

upper2:     shl     eax, 9                  ; set carry from bit7 of 3rd byte
            sbb     edx, 1                  ; edx = (edx-2) + (1-carry)
result:     mov     eax, edx                ; return as the result
            cmp     edx, edi
            jae     past                    ; return fail (0), on match past the end
            pop     edi
            pop     ebx
            ret

past:       mov     eax, 0
            pop     edi
            pop     ebx
            ret

            align       8
fixalign:   mov       ecx, edx
            and     edx, not 3              ; edx points to aligned addr
            and     ecx, 3                  ; ecx = bytes to skip
            shl      ecx, 3                 ; ecx = bits to skip
            mov     eax, 1
            shl     eax, cl                 ; put cl 1's into eax from the bottom up
            mov     ecx, [edx]              ; load first dword into ecx
            sub     eax, 1
            xor     ecx, ebx                ; compare the character we are looking for with each byte
            or      eax, ecx                ; force skip bytes into unequal status (to skip them)
            jmp     resume                  ; start up in the aligned search
    }
}


Chambao

thank you very much Randall Hyde and Codewarp

find1 proc uses ebx lpStr:DWORD, char:DWORD
mov ebx, char
mov eax, lpStr
imul ebx, 1010101h

.repeat
mov edx, [eax]
add eax, 4
xor edx, ebx
; sub edx, ebx
lea ecx, [edx-01010101h]
not edx
and edx, ecx
and edx, 80808080h
.until !zero?

sub eax, 5
.repeat
inc eax
ror edx, 8
.until carry?
sub eax, lpStr
ret
find1 endp