Hi all,
I want to do a simple string comparing. I can write code for it, but I am sure it will not be as efficient as MASM's routines. I found: CMPS but I could not use it. If it is a good way to compare two strings, can anybody show me how to use it with a simple example ? Otherwise, what would be better and more efficient way ?
Regards,
Danesh
There are numerous ways to compare two strings. The simplest one is to keep comparing byte by byte till you reach end of the string:
mov esi, offset string1
mov edi, offset string2
@@:
lodsb
test al, al ; check if it's end of first string
je @f
scasb
je @b
jmp strings_not_equal
@@:
cmp [edi], byte ptr 0 ; check second string for end too
jne strings_not_equal
Here's another way similar to Arafel's but using the cmpsb instruction, and also assuming that both strings are null-terminated.
mov esi, offset string1
mov edi, offset string2
@@:
mov al,[esi]
cmpsb
jne strings_not_equal
test al, al
jne @b
strings_are_equal:
.....
strings_not_equal:
.....
If your strings are not null-terminated, you will have to load ECX with the size of one of the strings (the two strings cannot be equal if they don't have the same size) and use code such as follows.
mov esi, offset string1
mov edi, offset string2
mov ecx,string_length
repz cmpsb
jne strings_not_equal
strings_are_equal:
.....
strings_not_equal:
.....
(Note that you could also use this last code with null-terminated strings.)
Raymond
Danesh,
This is from the masm library:
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
.486 ; force 32 bit code
.model flat, stdcall ; memory model & calling convention
option casemap :none ; case sensitive
szCmp PROTO :DWORD,:DWORD
.code
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
align 16
szCmp proc str1:DWORD, str2:DWORD
; --------------------------------------
; scan zero terminated string for match
; --------------------------------------
push esi
mov ecx, str1
mov edx, str2
xor esi, esi
cmst:
mov al, [ecx+esi]
cmp al, [edx+esi]
jne no_match
add esi, 1
test al, al ; check for terminator
jne cmst
lea eax, [ecx+esi-1]
sub eax, str1 ; return length on match
jmp cmpout
no_match:
xor eax, eax ; return zero on no match
cmpout:
pop esi
ret
szCmp endp
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
end
Works well for me.
Paul
Danesh,
Look on the forum for Donkeys String library. Its awesome.
Rgs, striker.
Danesh,
This program does a quick function test of all three examples, and measures the number of clock cycles that each takes to compare the sample strings. As you might expect, the simpler and/or easier to understand versions execute slower than the more complex and harder to understand version from the MASM32 library.
; ««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
.586 ; create 32 bit code
.model flat, stdcall ; 32 bit memory model
option casemap :none ; case sensitive
include \masm32\include\windows.inc
include \masm32\include\masm32.inc
include \masm32\include\kernel32.inc
includelib \masm32\lib\masm32.lib
includelib \masm32\lib\kernel32.lib
include \masm32\macros\macros.asm
include timers.asm
szCmp_arafel PROTO :DWORD,:DWORD
szCmp_raymond PROTO :DWORD,:DWORD
; ««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
.data
s1 db "my other brother darryl",0
s2 db "my other brother darrel",0
.code
; ««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
start:
; ««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
LOOP_COUNT equ 10000000
invoke szCmp_arafel, ADDR s1, ADDR s1
print uhex$(eax)
print chr$(13,10)
invoke szCmp_raymond, ADDR s1, ADDR s1
print uhex$(eax)
print chr$(13,10)
invoke szCmp, ADDR s1, ADDR s1
print uhex$(eax)
print chr$(13,10)
invoke szCmp_arafel, ADDR s1, ADDR s2
print uhex$(eax)
print chr$(13,10)
invoke szCmp_raymond, ADDR s1, ADDR s2
print uhex$(eax)
print chr$(13,10)
invoke szCmp, ADDR s1, ADDR s2
print uhex$(eax)
print chr$(13,10)
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke szCmp_arafel, ADDR s1, ADDR s2
counter_end
print ustr$(eax)
print chr$(" cycles",13,10)
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke szCmp_raymond, ADDR s1, ADDR s2
counter_end
print ustr$(eax)
print chr$(" cycles",13,10)
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke szCmp, ADDR s1, ADDR s2
counter_end
print ustr$(eax)
print chr$(" cycles",13,10)
mov eax, input(13,10,"Press enter to exit...")
exit
; ««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
szCmp_arafel proc str1:DWORD, str2:DWORD
mov esi, str1
mov edi, str2
@@:
lodsb
test al, al ; check if it's end of first string
je @f
scasb
je @b
jmp strings_not_equal
@@:
cmp [edi], byte ptr 0 ; check second string for end too
jne strings_not_equal
strings_are_equal:
mov eax, -1
jmp @F
strings_not_equal:
mov eax,0
@@:
ret
szCmp_arafel endp
szCmp_raymond proc str1:DWORD, str2:DWORD
mov esi, str1
mov edi, str2
@@:
mov al, [esi]
cmpsb
jne strings_not_equal
test al, al
jne @b
strings_are_equal:
mov eax, -1
jmp @F
strings_not_equal:
mov eax,0
@@:
ret
szCmp_raymond endp
end start
Running on a P3:
FFFFFFFF
FFFFFFFF
00000017
00000000
00000000
00000000
247 cycles
146 cycles
90 cycles
EDIT:
Oops, TIMERS.ASM is available here:
http://www.masmforum.com/simple/index.php?topic=770.0
Hi, all
Thanks for your great solutions as usual. Based on Michael's benchmark it seems string comparing in MASM32 lib is more efficient and also easy. Thats cool.
Thanks anyway,
Danesh
What if one is db and the other dw? Will I have to stick to lstrcmp?
Convert the ascii string to unicode, then use lstrcmpU.