News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Unicode text array algo with test piece.

Started by hutch--, May 14, 2011, 12:12:44 AM

Previous topic - Next topic

hutch--

I needed this algo for a project I am working on, it reads a unicode plain text file and writes the array of pointers and data to a single allocated block of memory returning the line count and the written byte length. Its initial task was reading a txt based INI file on a line by line basis. It could be done one operation faster but I doubt that its a problem with its intended task.


; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

uc_text_array proc parr:DWORD,ucfname:DWORD

  ; ------------------------------------------------------------------------
  ; ARGS.
  ; 1. parr    = address of empty pointer for allocated array to be returned
  ; 2. ucfname = UNICODE format file name to read lines from.
  ;
  ; EAX returns the line count (number of text lines in returned array)
  ; ECX returns the written length to memory of array in BYTES.
  ;
  ; release the allocated memory at "parr" with GlobalFree() when finished.
  ; ------------------------------------------------------------------------

    LOCAL hFile :DWORD
    LOCAL blen  :DWORD
    LOCAL hmem  :DWORD
    LOCAL pmem  :DWORD
    LOCAL lcnt  :DWORD
    LOCAL xtra  :DWORD
    LOCAL bread :DWORD
    LOCAL lnpt  :DWORD

    push ebx
    push esi
    push edi

    invoke CreateFileW,ucfname,GENERIC_READ,FILE_SHARE_READ,
                       NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL
    mov hFile, eax

    invoke GetFileSize,hFile,NULL
    mov blen, eax
    mov xtra, eax
    add xtra, 64

    invoke GlobalAlloc,GMEM_FIXED or GMEM_ZEROINIT,xtra
    mov hmem, eax
    invoke ReadFile,hFile,hmem,blen,ADDR bread,NULL
    invoke CloseHandle,hFile

  ; ----------------------------------------
  ; count the lines in the unicode text file
  ; ----------------------------------------
    mov lcnt, 0
    mov ebx, hmem
    sub ebx, 2
  @@:
    add ebx, 2
    cmp WORD PTR [ebx], 0
    je @F
    cmp WORD PTR [ebx], 000Dh       ; count the CR
    jne @B
    add lcnt, 1
    jmp @B
  @@:

  ; --------------------------
  ; check if last char is a LF
  ; --------------------------
    cmp WORD PTR [ebx-2], 000Ah
    je pastit
  ; --------------------------
  ; add a CRLF pair if its not
  ; --------------------------
    mov DWORD PTR [ebx], 000A000Dh
    add lcnt, 1

  pastit:
  ; ----------------------------------------------
  ; calculate the space for both pointers and data
  ; ----------------------------------------------
    mov eax, lcnt
    lea eax, [eax*4]
    mov lnpt, eax
    add eax, blen
    add eax, 64

    invoke GlobalAlloc,GMEM_FIXED or GMEM_ZEROINIT,eax
    mov pmem, eax

  ; ---------------------------------------------
  ; copy the data to the space after the pointers
  ; ---------------------------------------------
    mov esi, hmem
    mov edi, pmem
    add edi, lnpt
    add edi, 4
    mov ecx, blen
    add ecx, 4
    rep movsb
    invoke GlobalFree,hmem          ; free the file memory

    mov esi, pmem                   ; pointer array
    mov edi, pmem
    add edi, lnpt                   ; array data storage
    add edi, 4

    mov [esi], edi                  ; write the 1st data address into the first pointer in ESI
    add esi, 4

  @@:
    add edi, 2
  backin:
    cmp WORD PTR [edi], 0000h       ; exit on end of file
    je done
    cmp WORD PTR [edi], 000Dh       ; check for unicode CR
    jne @B
    mov DWORD PTR [edi], 0          ; terminate each line with a DWORD sized zero.
    add edi, 4
    mov [esi], edi                  ; write the next line address to the next pointer in ESI
    add esi, 4
    jmp backin

  done:
    mov ecx, edi
    sub ecx, pmem                   ; calculate and return the written length in ECX

    mov esi, parr                   ; load the empty array handle into ESI
    mov edi, pmem                   ; load the allocated array into EDI
    mov [esi], edi                  ; write the array address to the empty handle

    mov eax, lcnt                   ; return the line count

    pop edi
    pop esi
    pop ebx

    ret

uc_text_array endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php