Here's a start on it:
UNI_to_UTF8: ; (dest,src,charcount) returns the output size in eax
push ebp ; charcount=-1 means the string is nul-terminated
mov ebp,esp
push ebx
push esi
push edi
mov esi,[ebp+12] ;source
mov ebx,[ebp+16] ;count of wide characters
cmp ebx,-1
jne short @F
mov ecx,ebx ;-1
mov edi,esi ;source
xor eax,eax
repne scasw
dec ebx ;to -2
sub ebx,ecx ;ebx is now the input size in wide characters
@@: mov edi,[ebp+8] ;destination
UU_loop:
dec ebx
js short UU_done
xor eax,eax
lodsw
test ah,11111000b
jnz short UU_3
test ax,0000011110000000b
jnz short UU_2
stosb
jmp short UU_loop
UU_3:
mov ch,al
shr eax,6
mov cl,al
shr eax,6
and cx,0011111100111111b
or cx,1000000010000000b
or al,11100000b
stosb
xchg eax,ecx
stosw
jmp short UU_loop
UU_2:
shl eax,2 ;eax has 8 to 11 bits of info
shr al,2
xchg al,ah ;the six low bits go in the latter output byte
or ax,1000000011000000b
stosw
jmp short UU_loop
UU_done:
xchg eax,edi
sub eax,[ebp+8]
pop edi
pop esi
pop ebx
pop ebp
ret 12
That's the easy part. The reverse, expanding UTF-8 to Unicode, will require quite a few tests for invalid input.
I tested the above with some Russian text (400h-4FFh in Unicode) and Hindi text (900-9FFh) and so far so good.
Same thing but with dword input characters from the 31-bit Universal Character Set (UCS-4):
USC_to_UTF8 proc uses ebx esi edi dest:ptr,src:ptr,charcount:dword
;returns the output size in eax, or
; eax=-1 if the input includes an invalid dword >= 80000000h
; charcount=-1 means the string is nul-terminated
mov esi,src
mov ebx,charcount
cmp ebx,-1
jne short @F
mov ecx,ebx ;-1
mov edi,esi ;source
xor eax,eax
repne scasd
dec ebx ;to -2
sub ebx,ecx ;ebx is now the input size in dwords
@@: mov edi,dest
UCS_loop:
dec ebx
js UCS_done
lodsd
bsr ecx,eax
cmp cl,6
jbe short UCS_1
cmp cl,10
jbe short UCS_2
cmp cl,15
jbe short UCS_3
cmp cl,20
jbe short UCS_4
cmp cl,25
jbe short UCS_5
cmp cl,30
jbe short UCS_6
or eax,-1
jmp UCS_ret
UCS_6: ;output will be:
; 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
call fan_eax_to_edx_3
;18 bits done, 13 remaining
shl eax,2
shr al,2
or al,10000000b
mov dl,al
;7 bits remaing
shr eax,6
shr al,2
or ax,1111110010000000b
xchg al,ah
stosw
xchg eax,edx
stosd
jmp UCS_loop
UCS_1:
stosb
jmp UCS_loop
UCS_2:
shl eax,2
shr al,2
xchg al,ah
or ax,1000000011000000b
stosw
jmp UCS_loop
UCS_3: ;1110xxxx 10xxxxxx 10xxxxxx
mov ch,al
shr eax,6
mov cl,al
shr eax,6
and cx,0011111100111111b
or cx,1000000010000000b
or al,11100000b
stosb
xchg eax,ecx
stosw
jmp UCS_loop
UCS_4: ;11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
call fan_eax_to_edx_3
mov dl,al
and dl,00000111b
or dl,11110000b
xchg eax,edx
stosd
jmp UCS_loop
UCS_5: ;111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
call fan_eax_to_edx_3
shl eax,2
shr al,2
or ax,1111100010000000b
mov dl,al
mov al,ah
stosb
xchg eax,edx
stosd
jmp UCS_loop
UCS_done:
xchg eax,edi
sub eax,dest
UCS_ret:
ret
USC_to_UTF8 endp
fan_eax_to_edx_3:
mov ecx,3
@@: mov dl,al
shr eax,6
and dl,00111111b
or dl,10000000b
shl edx,8
loop @B
ret