I was looking for a fast way to compare strings, and this hilarious MS blog (http://blogs.msdn.com/michkap/archive/2005/12/22/506595.aspx) recommended a fantastic brand new Vista function that is indeed "a wrapper around the RtlCompareUnicodeString". Since I still rarely need Unicode, I quickly found RtlCompareString. Great, but it crashes... any idea why?
Celeron M:
6 cycles for direct comp aka hand-coded
1573 cycles for lstrcmp
1569 cycles for lstrcmpi
.nolist
include \masm32\include\masm32rt.inc
uselib ntdll ; needed for RtlCompareXXX
.686
include \masm32\macros\timers.asm
LOOP_COUNT = 100000
UseRTL = 0 ; put 1 to see the crash
.data
Src1 db "This is a string, 100 characters long, that serves for a variety of purposes, such as testing algos.", 0
Src2 db "That is a string, 101 characters long, that serves for a variety of purposes, such as testing algos..", 0
.code
start:
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
mov esi, offset Src1
mov edi, offset Src2
or ecx, -1
.Repeat
inc ecx
mov al, [esi+ecx]
.Until al!=[edi+ecx] || al==0
counter_end
print str$(eax), 9, "cycles for direct comp", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke lstrcmp, offset Src1, offset Src2
counter_end
print str$(eax), 9, "cycles for lstrcmp", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke lstrcmpi, offset Src1, offset Src2
counter_end
print str$(eax), 9, "cycles for lstrcmpi", 13, 10
if UseRTL
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke RtlCompareString, offset Src1, offset Src2, 0
counter_end
print str$(eax), 9, "cycles for RtlCompareString 0", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke RtlCompareString, offset Src1, offset Src2, 1
counter_end
print str$(eax), 9, "cycles for RtlCompareString 1", 13, 10
endif
inkey chr$(13, 10, "--- ok ---", 13)
exit
end start
... and it crashes immediately in this highly optimised loop:
Address Hex dump Command Comments
7C96DFDA 8A06 mov al, [esi]
7C96DFDC 8A0F mov cl, [edi]
7C96DFDE 46 inc esi
7C96DFDF 47 inc edi
7C96DFE0 3AC1 cmp al, cl
7C96DFE2 75 06 jne short 7C96DFEA
7C96DFE4 3BF3 cmp esi, ebx
7C96DFE6 72 F2 jb short 7C96DFDA
esi=edi=20736920h
QuoteThe RtlCompareString routine compares two counted strings
I think the key word here is 'counted' - different to zero-terminated.
The PSTRING type is a structure but MSDN won't show it to me ::)
// begin_ntndis
//
// Counted String
//
typedef struct _STRING {
USHORT Length;
USHORT MaximumLength;
#ifdef MIDL_PASS
[size_is(MaximumLength), length_is(Length) ]
#endif // MIDL_PASS
PCHAR Buffer;
} STRING;
typedef STRING *PSTRING;
NTSYSAPI
LONG
NTAPI
RtlCompareString(
PSTRING String1,
PSTRING String2,
BOOLEAN CaseInSensitive
);
Edit: The source of the STRING structure was a fairly recent DDK, specifically C:\WINDDK\3790.1830\inc\w2k\ntdef.h, although this is only one of multiple possible paths.
Thanx, Michael :U
I got it working:
6 cycles for hand-coded Masm
1578 cycles for lstrcmp
1579 cycles for lstrcmpi
1222 cycles for CompareString
12 cycles for crt_strcmp
25 cycles for RtlCompareString 0
97 cycles for RtlCompareString 1
include \masm32\include\masm32rt.inc
uselib ntdll ; needed for RtlCompareXXX
.686
include \masm32\macros\timers.asm
LOOP_COUNT = 500000
UseRTL = 1
btct = 100
.data
Src1a dw btct, btct
dd Src1
Src1 db "This is a string, 100 characters long, that serves for a variety of purposes, such as testing algos.", 0
Src2a dw btct, btct
dd Src2
Src2 db "That is a string, 101 characters long, that serves for a variety of purposes, such as testing algos..", 0
.code
start:
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
mov esi, offset Src1
mov edi, offset Src2
or ecx, -1
.Repeat
inc ecx
mov al, [esi+ecx]
.Until al!=[edi+ecx] || al==0
counter_end
print str$(eax), 9, "cycles for hand-coded Masm", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke lstrcmp, offset Src1, offset Src2
counter_end
print str$(eax), 9, "cycles for lstrcmp", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke lstrcmpi, offset Src1, offset Src2
counter_end
print str$(eax), 9, "cycles for lstrcmpi", 13, 10
LOCALE_SYSTEM_DEFAULT = 800h
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke CompareString, LOCALE_SYSTEM_DEFAULT, SORT_STRINGSORT, offset Src1, 99, offset Src2, 99
counter_end
print str$(eax), 9, "cycles for CompareString", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke crt_strcmp, offset Src1, offset Src2
counter_end
print str$(eax), 9, "cycles for crt_strcmp", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke RtlCompareString, offset Src1a, offset Src2a, 0
counter_end
print str$(eax), 9, "cycles for RtlCompareString 0", 13, 10
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
invoke RtlCompareString, offset Src1a, offset Src2a, 1
counter_end
print str$(eax), 9, "cycles for RtlCompareString 1", 13, 10
inkey chr$(13, 10, "--- ok ---", 13)
exit
end start