.data
tblBase64 db "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
.code
Base64Encode: ;(lpSource, lpDestination, InputAmt), returns eax = output size
push ebp
mov ebp,esp
push esi
push edi
push ebx
mov ebx,offset tblBase64
mov esi,[ebp+8]
mov edi,[ebp+12]
mov edx,[ebp+16]
start_a_trio:
mov ah,0
mov ecx,6
@@: dec edx
js short @F
lodsb
shl ax,cl
xchg al,ah
xlat
stosb
shr ah,cl
sub cl,2
jnz @B
mov al,ah
xlat
stosb
jmp short start_a_trio
@@: ;cl is 6/4/2 if input size is 0/1/2 mod 3
sub cl,4
js short two_mod_3
jnz short zero_mod_3
mov al,0
shr eax,4
xlat
stosb
mov ax,"=="
stosw
jmp short zero_mod_3
two_mod_3:
mov al,0
shr eax,6
xlat
mov ah,"="
stosw
zero_mod_3:
xchg eax,edi
sub eax,[ebp+12]
pop ebx
pop edi
pop esi
pop ebp
ret 12
:8)
Here's the decoding routine. In this illustration the xlat table called tblBase64_inverse is generated at run time, but it could instead be embedded in the data or code section.
.data
tblBase64 db "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
tblBase64_inverse db 256 dup (?)
.code
make_mybase_inv:
mov edi,offset tblBase64_inverse
mov ecx,256/4
or eax,-1
rep stosd
mov ecx,64
mov esi,offset tblBase64
mov edi,offset tblBase64_inverse
inc eax ;to zero
cwd
@@:
lodsb
mov [edi+eax],dl
inc edx
loop @B
ret
Base64Decode@8: ;(lpszSource, lpDestination) Returns eax=output size.
push ebp
mov ebp,esp
push esi
push edi
push ebx
mov esi,[ebp+8]
mov edi,[ebp+12]
mov ebx,offset mybase_inv
start_quartet:
xor edx,edx ;edx accumulates 3 bytes of output
mov ecx,4
@@: lodsb
test al,al
jz short @F
xlat
test al,al
js short @B
shl edx,6
or dl,al
loop @B
mov eax,edx
shr eax,10h
stosb
xchg eax,edx
xchg al,ah
stosw
jmp start_quartet
@@: ;if cl=4/3/2/1 there are 0/?/1/2 output bytes in the last set.
;The case cl=3 should never occur. If it does we do not
;decode the last byte. Only 2,3,or 4 data bytes should
;appear in each coded quartet: xx==, xxx=, or xxxx.
xchg eax,edx
sub cl,2
ja short Base64Decode_finish
jz short @F
ror eax,10 ;case xxx=, 18 bits of which the only the top 16 matter
stosb
shr eax,20
@@: shr eax,4 ;case xx==, 12 bits of which only the top 8 matter
stosb
Base64Decode_finish:
xchg eax,edi
sub eax,[ebp+12]
pop ebx
pop edi
pop esi
pop ebp
ret 8
if you care about speed you main loop for encoding should look something like this:
cld
movd mm0, esp
.next:
mov eax, [ebp]
bswap eax
mov ebx, eax
mov edx, eax
mov ecx, eax
shr eax, 8
shr ebx, 14
shr edx, 20
shr ecx, 26
and eax, 111111b
and ebx, 111111b
and edx, 111111b
movzx eax, byte [base64_enc_table+eax]
movzx ebx, byte [base64_enc_table+ebx]
movzx edx, byte [base64_enc_table+edx]
movzx ecx, byte [base64_enc_table+ecx]
shl eax, 24
shl ebx, 16
shl edx, 8
add eax, ebx
add ecx, edx
add ebp, 3
add eax, ecx
stosd
sub esp, 3
ja .next
movd esp, mm0
oh, just realized I still got esi available to use. You could use esp then for dynamic(custom) location of encoding table
the algo I wrote is fairly common you just need to do better search
Thanks cmpxchg. That does look appreciably faster, fetching memory 4 bytes at a time. Since the data that goes through Base64 tends to be pretty bulky, my routine is worth rewriting and when I get around to it I'll post it here.