Hello there,
Well basically I need to split a string by a certain delimiter that is 3 characters long. For example.
|#|Hello|#|World|#|Bye|#|World|#|
So when split.
Byte array one would contain 'Hello'
Byte array two would contain 'World'
3 Would contain 'Bye' and so on.
I have no problem doing this in Delphi but as I am working on a project in MASM I am hoping to have it in MASM.
Does anyone have an example of this anywhere or something?
Thanks In Advance :)
include \masm32\include\masm32rt.inc
.data?
TheArray db 1000 dup(?)
.data
TheSrc db "|#|Hello|#|World|#|Bye|#|World|#|", 0
.code
start: mov esi, offset TheSrc
mov edi, offset TheArray
xor ecx, ecx
mov eax, esi
.Repeat
.if word ptr [esi+ecx]=="#|" && byte ptr [esi+ecx+2]=="|"
mov byte ptr [esi+ecx], 0
stosd
lea eax, [esi+ecx+3]
.endif
inc ecx
.Until ecx>=sizeof TheSrc
mov esi, offset TheArray
.While 1
lodsd
.Break .if !eax
print eax, 13, 10
.Endw
inkey "That was easy, right?"
exit
end start
This looks confusing and I don't quite understand it. Is there a simpler way to do this? Hopefully in a procedure so I can use it multiple times throughout my program.
a bit more dynamically (quick'n dirty ;-)):
Quoteinclude masm32rt.inc
.code
strLenX proc uses ebx pStr:ptr CHAR
mov ebx,pStr
xor eax,eax
xor ecx,ecx
.while CHAR ptr [ebx+eax]
.if CHAR ptr [ebx+eax] == '|' && ecx == 0
mov edx,eax
or ecx,1
.elseif CHAR ptr [ebx+eax] == '#' && ecx == 1
or ecx,2
.elseif CHAR ptr [ebx+eax] == '|' && ecx == 3
mov eax,edx
mov edx,1
ret
.else
xor ecx,ecx
.endif
inc eax
.endw
mov edx,0
ret
strLenX endp
parse proc uses ebx edi esi psz:PCHAR
LOCAL ppChar:ptr CHAR
mov ebx,psz
xor esi,esi
invoke strLenX,ebx
.while edx
.if eax
add ebx,eax
inc esi
.endif
add ebx,3
invoke strLenX,ebx
.endw
mov esi,alloc(ADDR [esi*4+4])
mov ppChar,esi
mov ebx,psz
invoke strLenX,ebx
.while edx
.if eax
push eax
push ebx
add ebx,eax
mov edi,alloc(eax)
mov PCHAR ptr [esi],edi
add esi,4
pop edx
pop ecx
.while ecx
movzx eax,CHAR ptr [edx]
mov CHAR ptr [edi],al
inc edx
inc edi
dec ecx
.endw
mov CHAR ptr [edi],0
.endif
add ebx,3
invoke strLenX,ebx
.endw
mov eax,ppChar
ret
parse endp
main proc
.data
string db "|#|Hello|#|World|#|Bye|#|World|#|",0
.code
invoke parse,ADDR string
mov esi,eax
xor edi,edi
.while PCHAR ptr [esi+edi*4]
print str$(edi)
print ": "
print PCHAR ptr [esi+edi*4],13,10
inc edi
.endw
inkey
exit
main endp
end main
Quote from: DeadlyVermilion on May 30, 2011, 07:47:58 PM
This looks confusing and I don't quite understand it.
See \masm32\help\opcodes.chm for lods and stos.
QuoteIs there a simpler way to do this?
No. I showed you the easiest one.
If you want high-level language simplicity, try the CRT.
;==============================================================================
include \masm32\include\masm32rt.inc
;==============================================================================
.data
str1 db "|#|Hello|#|World|#|Bye|#|World|#|",0
.code
;==============================================================================
start:
;==============================================================================
invoke crt_strtok, ADDR str1, chr$("|#|")
mov ebx, eax
.WHILE ebx
print ebx,13,10
invoke crt_strtok, NULL, chr$("|#|")
mov ebx, eax
.ENDW
inkey "Press any key to exit..."
exit
;==============================================================================
end start
The compiler-generated assembly code for strtok is ~120 lines, so for assembly code the versions that qWord and jj2007 supplied are relatively short.
strtok works on character delimiters not string delimiters... (strtok also requires writable string as it nulls out delimiter characters)
strstr can be used for strings.
Here's a function that I might add to my library..
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Function: StringSplit
;;
;; Splits a string to an array of substrings according to the separator
;; parameter.
;;
;; Parameters:
;;
;; lpString - source string
;; lpDelimiter - delimiter string
;; lpdwArrayElements - pointer to a dword variable that will recieve array
;; length
;;
;; Returns:
;;
;; Array of strings.
;; Array and its elements are allocated with <calloc>.
;; Array length is stored in variable pointed by lpdwArrayElements
;;
;; Remarks:
;;
;; Array and its elements must be freed with "free" function
;;
StringSplit proc uses esi edi ebx lpString:LPCSTR, lpDelimiter:LPCSTR, lpdwArrayElements:DWORD
LOCAL p, pArray, dwWordLen
invoke strlen,lpDelimiter
mov edi,eax
mov esi,lpString
xor ebx,ebx;count
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
inc ebx
lea esi,[eax+edi]
.until FALSE
mov eax,lpdwArrayElements
lea ecx,[ebx+1]; delims + 1 == elements
mov [eax],ecx
invoke calloc,ecx,sizeof LPCSTR
mov pArray,eax
mov p,eax
mov esi,lpString
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
mov ebx,eax
mov edx,eax
sub edx,esi
mov dwWordLen,edx
inc edx
invoke calloc,edx,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,dwWordLen
lea esi,[ebx+edi]
.until FALSE
invoke strlen,esi
mov ebx,eax
inc eax
invoke calloc,eax,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,ebx
mov eax,p
ret
StringSplit endp
Test code:
local pArray, Elems
invoke StringSplit,T("|#|Hello|#|World|#|Bye|#|World|#|"),T("|#|"),addr Elems
mov pArray,eax
invoke printf,T("%u Elements",CRLF,CRLF),Elems
mov esi,pArray
.while Elems
invoke printf,T('[%s]',CRLF),LPCSTR ptr [esi]
invoke free,[esi]
add esi,sizeof LPCSTR
dec Elems
.endw
invoke free,pArray
Output:
6 Elements
[]
[Hello]
[World]
[Bye]
[World]
[]