Unicode line by line reading algorithm.

Started by hutch--, June 21, 2011, 08:18:10 AM

Previous topic - Next topic

hutch--

I wrote this in MASM for a project I am working on and ported it with no modifications to the new PB FASTPROC format. It is a sequential line reader that reads 1 line at a time from a memory address writing the result to a user defined memory buffer. It returns zero when the source address reaches its zero terminator. My own initial use for this algo was to load a settings file from disk then parse it line by line to get the text settings. It is primarily designed to be called from assembler code but the example uses a macro that makes it easier to use from high level code.


' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    MACRO getline(sptr,bptr,lptr)
    ! push lptr
    ! push bptr
    ! push sptr
    ! call uc_getline
    END MACRO

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FUNCTION PBmain as LONG

    LOCAL sztest as WSTRINGZ * 128
    LOCAL szbuff as WSTRINGZ * 16
    LOCAL pstr as WSTRINGZ PTR
    LOCAL pbuf as WSTRINGZ PTR
    LOCAL cloc as DWORD

    sztest = "one"+$CRLF+_
             "two"+$CRLF+_
             "three"+$CRLF+_
             "four"+$CRLF+_
             "five"+$CRLF+_
             ""+$CRLF+_         ' deliberate empty line (writes NULL string to output buffer)
             "six"+$CRLF+_
             "seven"+$CRLF+_
             "eight"+$CRLF+_
             "nine"+$CRLF+_
             "ten"

    pstr = VarPtr(sztest)       ' set pointer to main string
    pbuf = VarPtr(szbuff)       ' set pointer to buffer
    cloc = 0                    ' zero the current location counter

  lbl0:
    getline(pstr,pbuf,cloc)     ' call the FASTPROC
    ! mov cloc, eax             ' write the return value back to "cloc"
    msgbox @pbuf                ' display each result
    ! cmp cloc, 0               ' test if "cloc" is zero (last line in src with terminator)
    ! jne lbl0                  ' loop back for next if its not

End FUNCTION

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

FASTPROC uc_getline

  ' ------------------------------------------------------------------------------------
  ' read each line in a zero terminated UNICODE text from the current location pointer
  ' up to the next CRLF pair, then return the current location pointer for the start of
  ' the next line. If the line contains the zero terminator in the source text, return=0
  ' ------------------------------------------------------------------------------------

    PREFIX "! "

    push esi

    mov esi, [esp+4][4]         ; src
    mov edx, [esp+8][4]         ; dst
    add esi, [esp+12][4]        ; cloc
    sub esi, 2
    sub edx, 2

  lbl0:
    add esi, 2
    add edx, 2
    movzx eax, WORD PTR [esi]
    test eax, eax               ; test for terminator
    jz iszero
    cmp eax, 13                 ; test for leading CR
    je quit
    mov [edx], ax               ; write WORD to destination buffer
    jmp lbl0

  iszero:                       ; EAX is set to ZERO by preceding loop
    mov DWORD PTR [edx], 0      ; write terminator to destination buffer
    pop esi
    ret 12

  quit:
    mov DWORD PTR [edx], 0      ; write the terminator
    add esi, 4                  ; increment ESI up 2 WORD characters

    mov eax, esi
    sub eax, [esp+4][4]         ; return the current location in the source

    pop esi

    ret 12

    END PREFIX

END FASTPROC

' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php