News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

UTF8 Conversion

Started by UlliN, January 12, 2007, 06:27:22 PM

Previous topic - Next topic

UlliN

Hi,

here are my routines for converting UTF8-coded strings to ANSI/OEM and vice versa.  I've coded my own ProcessHeap-based memory allocation routines, cause I got some problems using alloc/free. Feel free to modify the code as you like.

.386
.model flat, stdcall
option casemap :none
;************************************************************
;  20060810      U.Neudecker   
;************************************************************

  include \masm32\include\masm32.inc
  include \masm32\include\windows.inc
  include \masm32\include\kernel32.inc
  includelib \masm32\lib\kernel32.lib
  include \masm32\macros\macros.asm

;  CharTypes:
;    CP_ACP       ANSI codepage   = 0
;    CP_OEMCP     OEM (original equipment manufacturer) codepage = 1

CP_UTF8 equ 65001

.code
COMMENT * #######################################################################
    Converts a String (ANSI/OEM) to UTF8
    The buffer pointed to by pUTF8String must be sufficient to hold the UTF8-string.
    Otherwise GetLastError is called.
    If the function succeeds, the return value is the number of bytes written to the buffer
    pointed to by pUTF8String. The number includes the byte for the null terminator
    Otherwise GetLastError is called with neg(!) result.
####################################################################### COMMENT *

CharToUTF8 proc stdcall uses ecx,
                pCharString:dword,             
                lenCharString:dword,           
                CharType:dword,
                pUTF8String:dword,             
                lenUTF8String:dword

LOCAL pUNICODE:dword
LOCAL lenUNICODE:DWORD

    mov ecx, lenCharString
    shl ecx, 1
    invoke ealloc, ecx
    mov pUNICODE, eax
    invoke MultiByteToWideChar, CharType, 1, pCharString, lenCharString, pUNICODE, ecx
    .if eax == 0
       invoke GetLastError
       neg eax
       invoke efree, pUNICODE
       ret
    .endif   

    invoke WideCharToMultiByte, CP_UTF8, 0, pUNICODE, lenCharString, pUTF8String, lenUTF8String, 0, 0
    .if eax == 0
       invoke GetLastError
       neg eax
     .endif
     push eax
     invoke efree, pUNICODE
     pop eax
     ret
CharToUTF8 endp

;#######################################################################

UTF8ToChar proc stdcall uses ecx,
          pUTF8String:dword,             
          lenUTF8String:dword,
          pCharString:dword,             
          lenCharString:dword,           
          CharType:dword

LOCAL pUNICODE:dword
LOCAL lenUNICODE:DWORD

    invoke MultiByteToWideChar, CP_UTF8, 0, pUTF8String, lenUTF8String, pUNICODE, 0
    mov lenUNICODE, eax
    shl eax, 1
    invoke ealloc, eax
    mov pUNICODE, eax   
    invoke MultiByteToWideChar, CP_UTF8, 0, pUTF8String, lenUTF8String, pUNICODE, lenUNICODE

    invoke WideCharToMultiByte, CharType, 0, pUNICODE, lenUNICODE, pCharString, lenCharString, 0, 0
    .if eax == 0
       invoke GetLastError
       neg eax
     .endif
     push eax
     invoke efree, pUNICODE
     pop eax
    ret
   
UTF8ToChar endp

;#######################################################################

END



Ulli