The MASM Forum Archive 2004 to 2012

General Forums => The Laboratory => Topic started by: johnsa on October 01, 2008, 11:12:27 AM

Title: Follow on from Levenshtein - Soundex
Post by: johnsa on October 01, 2008, 11:12:27 AM
So following on from the levenshtein function i gave earlier, here is another little proc to generate soundex codes for a string.. use it .. break it.. test it.. it all seems to be fine for me and pretty quick.



create_soundex PROTO stringAddr:DWORD, keyAddr:DWORD

.data

align 16
sxkeys db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
   db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
   db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
   db  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
   db  0,0,1,2,3,0,1,2,0,0,2,2,4,5,5,0
   db  1,2,6,2,3,0,1,0,2,0,2,0,0,0,0,0
   db  0,0,1,2,3,0,1,2,0,0,2,2,4,5,5,0
   db  1,2,6,2,3,0,1,0,2,0,2,0,0,0,0,0

.code

align 16
create_soundex PROC stringAddr:DWORD, keyAddr:DWORD

mov esi,stringAddr
mov edi,keyAddr

; Copy the first character to uppercase in key.
mov al,[esi]
.if al >= 61h && al <= 7ah
sub al,32
.endif
.if al < 41h || al > 7ah || (al > 5ah && al < 61h)
jmp nosoundex
.endif
mov [edi],al
inc esi
inc edi

mov edx,1
xor ebx,ebx
xor eax,eax
sxloop:
mov al,[esi]
mov bh,al
mov al,sxkeys[eax]
cmp al,bl
je short ignorechar
test al,al
jz short ignorechar

mov bl,al
add al,30h
mov [edi],al
inc edi

inc edx
cmp edx,4
je short soundexdone

ignorechar:
inc esi
test bh,bh
jnz short sxloop

cmp edx,4
jge short soundexdone
mov ecx,4
sub ecx,edx
mov al,'0'
sxpad:
mov [edi],al
inc edi
dec ecx
jnz short sxpad

soundexdone:
ret

nosoundex:
mov eax,'0000'
mov [edi],eax

ret
create_soundex ENDP

Title: Re: Follow on from Levenshtein - Soundex
Post by: dsouza123 on October 04, 2008, 03:00:48 PM
It appears well optimized, I don't see any opportunities to optimize.
Title: Re: Follow on from Levenshtein - Soundex
Post by: DoomyD on October 04, 2008, 10:32:48 PM
Try the following for the first character filter:
      mov   al,[esi]
      test   al,11000000b
      jz      nosoundex ;3Fh-
      js      nosoundex ;80h+
      and   al,00011111b ;range:40h-60h
      jz      nosoundex;40h
      cmp   al,1Ah
      jg      nosoundex;5Bh+
      or      al,01000000b;to char
      mov   [edi],al