News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Split String Into Array Via Delimiter

Started by DeadlyVermilion, May 30, 2011, 06:48:17 PM

Previous topic - Next topic

DeadlyVermilion

Hello there,

Well basically I need to split a string by a certain delimiter that is 3 characters long. For example.

|#|Hello|#|World|#|Bye|#|World|#|

So when split.
Byte array one would contain 'Hello'
Byte array two would contain 'World'
3 Would contain 'Bye' and so on.

I have no problem doing this in Delphi but as I am working on a project in MASM I am hoping to have it in MASM.

Does anyone have an example of this anywhere or something?

Thanks In Advance :)

jj2007

include \masm32\include\masm32rt.inc

.data?
TheArray db 1000 dup(?)

.data
TheSrc db "|#|Hello|#|World|#|Bye|#|World|#|", 0

.code
start: mov esi, offset TheSrc
mov edi, offset TheArray
xor ecx, ecx
mov eax, esi
.Repeat
.if word ptr [esi+ecx]=="#|" && byte ptr [esi+ecx+2]=="|"
mov byte ptr [esi+ecx], 0
stosd
lea eax, [esi+ecx+3]
.endif
inc ecx
.Until ecx>=sizeof TheSrc
mov esi, offset TheArray
.While 1
lodsd
.Break .if !eax
print eax, 13, 10
.Endw
inkey "That was easy, right?"
exit

end start

DeadlyVermilion

This looks confusing and I don't quite understand it. Is there a simpler way to do this? Hopefully in a procedure so I can use it multiple times throughout my program.

qWord

a bit more dynamically (quick'n dirty ;-)):
Quoteinclude masm32rt.inc
.code
strLenX proc uses ebx pStr:ptr CHAR
   
   mov ebx,pStr
   xor eax,eax
   xor ecx,ecx
   .while CHAR ptr [ebx+eax]
      .if CHAR ptr [ebx+eax] == '|' && ecx == 0
         mov edx,eax
         or ecx,1
      .elseif CHAR ptr [ebx+eax] == '#' && ecx == 1
         or ecx,2
      .elseif CHAR ptr [ebx+eax] == '|' && ecx == 3
         mov eax,edx
         mov edx,1
         ret
      .else
         xor ecx,ecx
      .endif
      inc eax
   .endw
   mov edx,0
   ret
   
strLenX endp

parse proc uses ebx edi esi psz:PCHAR
LOCAL ppChar:ptr CHAR

   mov ebx,psz
   xor esi,esi
   invoke strLenX,ebx
   .while edx
      .if eax
         add ebx,eax
         inc esi
      .endif
      add ebx,3
      invoke strLenX,ebx
   .endw

   mov esi,alloc(ADDR [esi*4+4])
   mov ppChar,esi
   mov ebx,psz
   invoke strLenX,ebx
   .while edx
      .if eax
         push eax
         push ebx
         add ebx,eax
         mov edi,alloc(eax)
         mov PCHAR ptr [esi],edi
         add esi,4
         pop edx
         pop ecx
         .while ecx
            movzx eax,CHAR ptr [edx]
            mov CHAR ptr [edi],al
            inc edx
            inc edi
            dec ecx
         .endw
         mov CHAR ptr [edi],0
      .endif
      add ebx,3
      invoke strLenX,ebx
   .endw
   mov eax,ppChar
   ret
parse endp

main proc

   .data
       string db "|#|Hello|#|World|#|Bye|#|World|#|",0
   .code
   invoke parse,ADDR string
   mov esi,eax
   xor edi,edi
   .while PCHAR ptr [esi+edi*4]
      print str$(edi)
      print ":   "
      print PCHAR ptr [esi+edi*4],13,10
      inc edi   
   .endw

   inkey
   exit
   
main endp
end main
FPU in a trice: SmplMath
It's that simple!

jj2007

Quote from: DeadlyVermilion on May 30, 2011, 07:47:58 PM
This looks confusing and I don't quite understand it.

See \masm32\help\opcodes.chm for lods and stos.

QuoteIs there a simpler way to do this?

No. I showed you the easiest one.

MichaelW

If you want high-level language simplicity, try the CRT.

;==============================================================================
    include \masm32\include\masm32rt.inc
;==============================================================================
    .data
        str1 db "|#|Hello|#|World|#|Bye|#|World|#|",0
    .code
;==============================================================================
start:
;==============================================================================
    invoke crt_strtok, ADDR str1, chr$("|#|")
    mov ebx, eax
    .WHILE ebx
        print ebx,13,10
        invoke crt_strtok, NULL, chr$("|#|")
        mov ebx, eax
    .ENDW
    inkey "Press any key to exit..."
    exit
;==============================================================================
end start


The compiler-generated assembly code for strtok is ~120 lines, so for assembly code the versions that qWord and jj2007 supplied are relatively short.
eschew obfuscation

drizz

strtok works on character delimiters not string delimiters... (strtok also requires writable string as it nulls out delimiter characters)
strstr can be used for strings.

Here's a function that I might add to my library..


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Function: StringSplit
;;
;; Splits a string to an array of substrings according to the separator
;; parameter.   
;;
;; Parameters:
;;
;; lpString - source string
;; lpDelimiter - delimiter string
;; lpdwArrayElements - pointer to a dword variable that will recieve array
;; length
;;
;; Returns:
;;
;; Array of strings.
;; Array and its elements are allocated with <calloc>.
;; Array length is stored in variable pointed by lpdwArrayElements
;;
;; Remarks:
;;
;; Array and its elements must be freed with "free" function
;;
StringSplit proc uses esi edi ebx lpString:LPCSTR, lpDelimiter:LPCSTR, lpdwArrayElements:DWORD

LOCAL p, pArray, dwWordLen

invoke strlen,lpDelimiter
mov edi,eax

mov esi,lpString
xor ebx,ebx;count
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
inc ebx
lea esi,[eax+edi]
.until FALSE

mov eax,lpdwArrayElements
lea ecx,[ebx+1]; delims + 1 == elements
mov [eax],ecx

invoke calloc,ecx,sizeof LPCSTR
mov pArray,eax
mov p,eax

mov esi,lpString
.repeat
invoke strstr,esi,lpDelimiter
.break .if eax == NULL
mov ebx,eax
mov edx,eax
sub edx,esi
mov dwWordLen,edx
inc edx
invoke calloc,edx,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,dwWordLen
lea esi,[ebx+edi]
.until FALSE

invoke strlen,esi
mov ebx,eax
inc eax
invoke calloc,eax,sizeof sbyte; allocates memory initialised to 0
mov edx,pArray
mov [edx],eax
add edx,sizeof LPCSTR
mov pArray,edx
invoke memcpy,eax,esi,ebx

mov eax,p
ret

StringSplit endp



Test code:
local pArray, Elems

invoke StringSplit,T("|#|Hello|#|World|#|Bye|#|World|#|"),T("|#|"),addr Elems
mov pArray,eax
invoke printf,T("%u Elements",CRLF,CRLF),Elems
mov esi,pArray
.while Elems
invoke printf,T('[%s]',CRLF),LPCSTR ptr [esi]
invoke free,[esi]
add esi,sizeof LPCSTR
dec Elems
.endw
invoke free,pArray

Output:
6 Elements

[]
[Hello]
[World]
[Bye]
[World]
[]

The truth cannot be learned ... it can only be recognized.