From a file I load allocated memory with a list of zero terminated words and a jump table.
To search a certain word in the list I have following code (a subfunction). The start address of the word I am searching is in esi.
mov ebx,pJmpTable
mov edi,dword ptr [ebx]
@next:
push esi
@@:
cmp byte ptr [edi],0 ;end of a word
je @found
cmpsb
je @B
add ebx,4
cmp dword ptr [ebx],0 ;end of jump table
je @notfound
mov edi,dword ptr [ebx]
pop esi
jmp @next
@found:
add esp,4
mov eax,TRUE
retn
@notfound:
add esp,4
mov eax,FALSE
retn
Any suggestions for improvements are greatly appreciated.
How big is your word list ?
15 to 20.000 words
Sort your word list, index the start of each word, then use a binary search - you'll find any word within 15 string comparisons (most of which will early-out because their first characters don't match.)
if the list is dynamically changing, it is maybe better to use a binary search tree.
Thank you for your answers.
I guess this is a binary search. This reduces speed to one fifth. The list is static during a search.
EDIT: sorry the speed is decresed to 8% not 20%.
Any suggestions for improvements are greatly appreciated.
mov wSize,ecx ;size of word
mov offs,0
mov ebx,pJmpTable
xor ecx,ecx
mov eax,noWords ;number of words in jump table
inc cl
shr eax,cl
add eax,eax
add eax,eax
add ebx,eax
mov edi,dword ptr [ebx] ;word from jump table
xor eax,eax
@@:
mov eax,offs ;char offset of word to search
mov al,byte ptr [esi + eax]
mov dl,byte ptr [edi]
sub al,dl
cmp al,0
jne @checkchar
inc offs
mov eax,wSize
cmp offs,eax
je @checkresult
inc edi
jmp @B
@checkchar:
mov offs,0
add sbyte ptr al,dl
.if al>='a' && al<='z'
sub al,20h
.endif
.if dl>='a' && dl<='z'
sub dl,20h
.endif
cmp al,dl
jl @below
mov eax,noWords
inc cl
shr eax,cl
add eax,eax
add eax,eax
cmp eax,0
je @notfound
add ebx,eax
mov edi,dword ptr [ebx]
jmp @B
@below:
mov eax,noWords
inc cl
shr eax,cl
add eax,eax
add eax,eax
cmp eax,0
je @notfound
sub ebx,eax
mov edi,dword ptr [ebx]
jmp @B
@checkresult:
add esi,offs
cmp byte ptr [esi],20h ;acceptible word separator
je @F
cmp byte ptr [esi],0
je @F
@notfound:
mov eax,FALSE
retn
@@:
mov eax,TRUE
retn
Your code could probably be cleaned up and optimized a bit, but I don't think it would give any significant improvement.
From what it looks like you're doing, it might be better to use a hash table.
Although, note that while a non-matching hash indicates the string definitely isn't there, a matching hash only tells you the string is probably there - you still need to check that it is the same string to be certain. But the hash table could index into the list, so checking wouldn't be a full search.
Thank you. I will clean up a bit. A hash table. I have seen code using it but I don't know how to use it. I will read about it. I am satisfied with the solution of the search problem.
For those interessted. I discovered that the search does not always converge properly.
To solve that you have to complete acordingly. I have also change to a search function
where pJumpTbl is a pointer to jump table, cWords is number of pointers, pSearchedWord
is pointer to word to search and wSize is number och characters in search word.
SearchInList proc pJumpTbl:dword,cWords:dword,pSearchedWord:dword,wSize:dword
LOCAL offs:dword
LOCAL count:dword
push edi
push esi
push ebx
;word to search in word list
mov esi,pSearchedWord
mov offs,0
mov ebx,pJumpTbl
xor ecx,ecx
mov eax,cWords
shr eax,1
mov count,eax
add eax,eax
add eax,eax
add ebx,eax
;word item in word list
mov edi,dword ptr [ebx]
@@:
mov eax,offs
mov al,byte ptr [esi + eax] ;char in search word
mov dl,byte ptr [edi] ;char in list word
cmp al,dl
jne @checkchar
inc offs
mov eax,wSize
cmp offs,eax
je @checkresult
inc edi
jmp @B
@checkchar:
.if al>='a' && al<='z'
sub al,20h
.endif
.if dl>='a' && dl<='z'
sub dl,20h
.endif
cmp al,dl
jg @above
jl @below
inc offs
mov eax,offs
cmp eax,wSize
je @notfound
inc edi
mov dl,byte ptr [edi]
mov al,byte ptr [esi + eax]
jmp @checkchar
@above:
mov offs,0
mov eax,count ;calculate number of words above
sub cWords,eax
mov eax,cWords
shr eax,1
mov count,eax
add eax,eax
add eax,eax
cmp eax,0
je @notfound
add ebx,eax
mov edi,dword ptr [ebx]
jmp @B
@below:
mov offs,0
mov eax,count ;calculate number of words below
mov cWords,eax
shr eax,1
mov count,eax
add eax,eax
add eax,eax
cmp eax,0
je @notfound
sub ebx,eax
mov edi,dword ptr [ebx]
jmp @B
@checkresult:
add esi,offs
.if byte ptr [esi]==20h && byte ptr [edi + 1]==0
je @found
.elseif byte ptr [esi]==0 && byte ptr [edi + 1]==0
je @found
.endif
@notfound:
xor eax,eax
pop ebx
pop esi
pop edi
ret
@found:
mov eax,offs
pop ebx
pop esi
pop edi
ret
SearchInList endp