The MASM Forum Archive 2004 to 2012

General Forums => The Campus => Topic started by: DeadlyVermilion on May 30, 2011, 06:48:17 PM

Title: Split String Into Array Via Delimiter
Post by: DeadlyVermilion on May 30, 2011, 06:48:17 PM
Hello there,

Well basically I need to split a string by a certain delimiter that is 3 characters long. For example.

|#|Hello|#|World|#|Bye|#|World|#|

So when split.
Byte array one would contain 'Hello'
Byte array two would contain 'World'
3 Would contain 'Bye' and so on.

I have no problem doing this in Delphi but as I am working on a project in MASM I am hoping to have it in MASM.

Does anyone have an example of this anywhere or something?

Thanks In Advance :)
Title: Re: Split String Into Array Via Delimiter
Post by: jj2007 on May 30, 2011, 07:37:47 PM
include \masm32\include\masm32rt.inc

.data?
TheArray db 1000 dup(?)

.data
TheSrc db "|#|Hello|#|World|#|Bye|#|World|#|", 0

.code
start: mov esi, offset TheSrc
mov edi, offset TheArray
xor ecx, ecx
mov eax, esi
.Repeat
.if word ptr [esi+ecx]=="#|" && byte ptr [esi+ecx+2]=="|"
mov byte ptr [esi+ecx], 0
stosd
lea eax, [esi+ecx+3]
.endif
inc ecx
.Until ecx>=sizeof TheSrc
mov esi, offset TheArray
.While 1
lodsd
.Break .if !eax
print eax, 13, 10
.Endw
inkey "That was easy, right?"
exit

end start
Title: Re: Split String Into Array Via Delimiter
Post by: DeadlyVermilion on May 30, 2011, 07:47:58 PM
This looks confusing and I don't quite understand it. Is there a simpler way to do this? Hopefully in a procedure so I can use it multiple times throughout my program.
Title: Re: Split String Into Array Via Delimiter
Post by: qWord on May 30, 2011, 08:03:19 PM
a bit more dynamically (quick'n dirty ;-)):
Quoteinclude masm32rt.inc
.code
strLenX proc uses ebx pStr:ptr CHAR
   
   mov ebx,pStr
   xor eax,eax
   xor ecx,ecx
   .while CHAR ptr [ebx+eax]
      .if CHAR ptr [ebx+eax] == '|' && ecx == 0
         mov edx,eax
         or ecx,1
      .elseif CHAR ptr [ebx+eax] == '#' && ecx == 1
         or ecx,2
      .elseif CHAR ptr [ebx+eax] == '|' && ecx == 3
         mov eax,edx
         mov edx,1
         ret
      .else
         xor ecx,ecx
      .endif
      inc eax
   .endw
   mov edx,0
   ret
   
strLenX endp

parse proc uses ebx edi esi psz:PCHAR
LOCAL ppChar:ptr CHAR

   mov ebx,psz
   xor esi,esi
   invoke strLenX,ebx
   .while edx
      .if eax
         add ebx,eax
         inc esi
      .endif
      add ebx,3
      invoke strLenX,ebx
   .endw

   mov esi,alloc(ADDR [esi*4+4])
   mov ppChar,esi
   mov ebx,psz
   invoke strLenX,ebx
   .while edx
      .if eax
         push eax
         push ebx
         add ebx,eax
         mov edi,alloc(eax)
         mov PCHAR ptr [esi],edi
         add esi,4
         pop edx
         pop ecx
         .while ecx
            movzx eax,CHAR ptr [edx]
            mov CHAR ptr [edi],al
            inc edx
            inc edi
            dec ecx
         .endw
         mov CHAR ptr [edi],0
      .endif
      add ebx,3
      invoke strLenX,ebx
   .endw
   mov eax,ppChar
   ret
parse endp

main proc

   .data
       string db "|#|Hello|#|World|#|Bye|#|World|#|",0
   .code
   invoke parse,ADDR string
   mov esi,eax
   xor edi,edi
   .while PCHAR ptr [esi+edi*4]
      print str$(edi)
      print ":   "
      print PCHAR ptr [esi+edi*4],13,10
      inc edi   
   .endw

   inkey
   exit
   
main endp
end main
Title: Re: Split String Into Array Via Delimiter
Post by: jj2007 on May 30, 2011, 08:14:11 PM
Quote from: DeadlyVermilion on May 30, 2011, 07:47:58 PM
This looks confusing and I don't quite understand it.

See \masm32\help\opcodes.chm for lods and stos.

QuoteIs there a simpler way to do this?

No. I showed you the easiest one.
Title: Re: Split String Into Array Via Delimiter
Post by: MichaelW on May 30, 2011, 08:59:27 PM
If you want high-level language simplicity, try the CRT.

;==============================================================================
    include \masm32\include\masm32rt.inc
;==============================================================================
    .data
        str1 db "|#|Hello|#|World|#|Bye|#|World|#|",0
    .code
;==============================================================================
start:
;==============================================================================
    invoke crt_strtok, ADDR str1, chr$("|#|")
    mov ebx, eax
    .WHILE ebx
        print ebx,13,10
        invoke crt_strtok, NULL, chr$("|#|")
        mov ebx, eax
    .ENDW
    inkey "Press any key to exit..."
    exit
;==============================================================================
end start


The compiler-generated assembly code for strtok is ~120 lines, so for assembly code the versions that qWord and jj2007 supplied are relatively short.
Title: Re: Split String Into Array Via Delimiter
Post by: drizz on May 30, 2011, 10:37:55 PM
strtok works on character delimiters not string delimiters... (strtok also requires writable string as it nulls out delimiter characters)
strstr can be used for strings.

Here's a function that I might add to my library..


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Function: StringSplit
;;
;; Splits a string to an array of substrings according to the separator
;; parameter.   
;;
;; Parameters:
;;
;; lpString - source string
;; lpDelimiter - delimiter string
;; lpdwArrayElements - pointer to a dword variable that will recieve array
;; length
;;
;; Returns:
;;
;; Array of strings.
;; Array and its elements are allocated with <calloc>.
;; Array length is stored in variable pointed by lpdwArrayElements
;;
;; Remarks:
;;
;; Array and its elements must be freed with "free" function
;;
StringSplit proc uses esi edi ebx lpString:LPCSTR, lpDelimiter:LPCSTR, lpdwArrayElements:DWORD

LOCAL p, pArray, dwWordLen

invoke strlen,lpDelimiter
mov edi,eax

mov esi,lpString
xor ebx,ebx;count
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
inc ebx
lea esi,[eax+edi]
.until FALSE

mov eax,lpdwArrayElements
lea ecx,[ebx+1]; delims + 1 == elements
mov [eax],ecx

invoke calloc,ecx,sizeof LPCSTR
mov pArray,eax
mov p,eax

mov esi,lpString
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
mov ebx,eax
mov edx,eax
sub edx,esi
mov dwWordLen,edx
inc edx
invoke calloc,edx,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,dwWordLen
lea esi,[ebx+edi]
.until FALSE

invoke strlen,esi
mov ebx,eax
inc eax
invoke calloc,eax,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,ebx

mov eax,p
ret

StringSplit endp



Test code:
local pArray, Elems

invoke StringSplit,T("|#|Hello|#|World|#|Bye|#|World|#|"),T("|#|"),addr Elems
mov pArray,eax
invoke printf,T("%u Elements",CRLF,CRLF),Elems
mov esi,pArray
.while Elems
invoke printf,T('[%s]',CRLF),LPCSTR ptr [esi]
invoke free,[esi]
add esi,sizeof LPCSTR
dec Elems
.endw
invoke free,pArray

Output:
6 Elements

[]
[Hello]
[World]
[Bye]
[World]
[]