Float to string without fpu

drizz · September 15, 2008, 02:32:32 AM

I've always wanted to write this kind of float conversion function, but figuring how to deal with floating point binary was 'pita'. Finally code that works ( and actually float string formating was just as painful ).

This is a preliminary version open for suggestions and optimizations :) and bug reports :)

Code Select


; SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
; 10000000000000000000000000000000 00000000000000000000000000000000
; 01111111111100000000000000000000 00000000000000000000000000000000
; 00000000000011111111111111111111 11111111111111111111111111111111
;            100000000000000000000 00000000000000000000000000000000 ; implied

OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE

R8_BIAS equ 1023
R8_MANT equ 52

R8ToStr proc r8:REAL8, lpBuffer:PTR

	push ebp
	push esi
	push edi
	push ebx
	locals = 32+3*4
	tmpbuff equ <[esp]>
	iTrail equ <dword ptr [esp+32]>
	iExp equ <dword ptr [esp+32+4]>
	nSign equ <dword ptr [esp+32+4+4]>
	add esp,-locals

	mov ecx,10000000000000000000000000000000b
	mov ebx,[esp+2*4][4*4][locals]
	mov esi,[esp+1*4][4*4][locals]	
	and ecx,ebx; sign bit
	and ebx,not 10000000000000000000000000000000b
	mov nSign,ecx
	mov edi,ebx
	shr ebx,20; 01111111111100000000000000000000
	and edi,00000000000011111111111111111111b
	cmp ebx,11111111111b
	je @@_NaN_Infinity
	mov eax,esi
	or eax,edi
	or eax,ebx
	jz @@Zero
	sub ebx,R8_BIAS; exponent
	or  edi,00000000000100000000000000000000b; high 20 bits + 1 implied
	xor ebp,ebp
	mov iExp,ebx
	;; 52bits in edi::esi
	.if sdword ptr ebx > 63
		shld edi,esi,63-R8_MANT
		shl esi,63-R8_MANT
		.repeat
			call __div10
			; mod 10
			lea ecx,[eax*4+eax]
			add ecx,ecx
			sub esi,ecx

			bsr ecx,edx
			neg ecx
			add ecx,31
			shld edx,eax,cl
			shl eax,cl

			mov ebx,iExp
			sub ebx,ecx
			
			or esi,eax
			mov edi,edx
			
			mov iExp,ebx
			add ebp,1
		.until sdword ptr ebx <= 63
		mov ecx,63
		sub ecx,ebx
		shrd esi,edi,cl
		shr edi,cl
	.else
		.while sdword ptr ebx < R8_MANT
			.while ! (edi & 0F0000000h)
				mov eax,esi
				mov edx,edi
				shld edi,esi,2
				shl esi,2
				add esi,eax
				adc edi,edx
				add ebx,1
				dec ebp
			.endw
			bsr ecx,edi
			sub ecx,31-4
			sbb edx,edx
			not edx
			and ecx,edx
			shrd esi,edi,cl
			shr edi,cl
			add ebx,ecx
		.endw
		lea ecx,[ebx-R8_MANT]
		shld edi,esi,cl
		shl esi,cl
	.endif
;	mov edx,nSign
;	pushad
;	shr edx,1
;	sbb edx,edx
;	and edx,'-'-'+'
;	add edx,'+'
;	invoke printf,T("%c%I64u.0e%i",13,10),edx,edi::esi,ebp
;	popad
	; job done, now just the hard part - formating
	
	;; adjust number to 16 digits 2386F26FC0FFFFh
	.while edi >= 2386F2h; LOW = 6FC0FFFF
		.break .if edi == 2386F2h && esi < 6FC0FFFFh
		call __div10
		mov esi,eax
		mov edi,edx
		add ebp,1;; increase exponent
	.endw

	;; round it if needed (if 16 digit) 38D7EA4C67FFFh
	.if edi>=38D7Eh;A4C67FFF
		.if ! (edi == 38D7Eh && esi < 0A4C67FFFh)
			add esi,5
			adc edi,0
			call __div10
			add ebp,1; increase exponent
			mov esi,eax
			mov edi,edx
		.endif		
	.endif
	
	mov iExp,ebp
	;; trailing zero count
	xor ebp,ebp
	jmp @F
	.repeat
		mov esi,eax
		mov edi,edx
		add ebp,1
	@@:	call __div10
		lea ecx,[eax*4+eax]
		neg ecx
		add ecx,ecx
		add ecx,esi
	.until !zero?
	mov iTrail,ebp
	xor ebp,ebp
	jmp @F
	.repeat
		call __div10
	@@:	lea ecx,[eax*4+eax]
		neg ecx
		lea ecx,[ecx*2+esi+'0']
		mov tmpbuff[ebp],cl
		add ebp,1
		mov esi,eax
		mov edi,edx
		or eax,edx
	.until zero?

	mov ecx,nSign
	mov esi,[esp+3*4][4*4][locals]	
	add ecx,ecx
	mov edx,'-'
	mov edi,iExp; exp
	sbb ecx,ecx
	and edx,ecx
	mov [esi],dl
	sub esi,ecx

	add edi,iTrail
	xchg esi,ebp
	.if zero?;; exponent is 0
		.repeat
			mov al,tmpbuff[esi-1]
			mov [ebp],al
			add ebp,1
			sub esi,1
		.until zero?
		
	.elseif (sdword ptr edi >=-15 && sdword ptr edi < 0)
		;; check for format without exp
		add edi,esi
		.if sdword ptr edi <= 0
			mov [ebp],word ptr '.0'
			add ebp,2
			.while sdword ptr edi < 0
				mov [ebp],byte ptr '0'
				add ebp,1
				add edi,1
			.endw
			.repeat
				mov al,tmpbuff[esi-1]
				mov [ebp],al
				add ebp,1
				sub esi,1
			.until zero?
		.else
			.repeat
				mov al,tmpbuff[esi-1]
				mov [ebp],al
				add ebp,1
				sub edi,1
				.if zero?
					mov [ebp],byte ptr '.'
					add ebp,1
				.endif
				sub esi,1
			.until zero?
		.endif
	.else
		;
		mov al,tmpbuff[esi-1]
		mov [ebp],al
		add ebp,1
		sub esi,1
		jz @F

		mov [ebp],byte ptr '.'
		add ebp,1
		.repeat
			mov al,tmpbuff[esi-1]
			mov [ebp],al
			add ebp,1
			add edi,1
			sub esi,1
		.until zero?
@@:
		mov [ebp],byte ptr 'e'
		add ebp,1

		mov eax,edi
		
		cdq
		and edx,'-'-'+'
		add edx,'+'
		mov [ebp],dl
		add ebp,1
		
		; abs
		cdq
		xor eax,edx
		sub eax,edx
		mov edi,0CCCCCCCDh; magic
		mov ecx,eax
		mul edi
		shr edx,3
		lea ebx,[edx*4+edx]
		neg ebx
		lea ebx,[ebx*2+ecx+'0']
		mov eax,edx
		.if edx
			mov ecx,eax
			mul edi
			shr edx,3
			lea esi,[edx*4+edx]
			neg esi
			lea esi,[esi*2+ecx+'0']
			mov eax,edx
			.if edx
				mov ecx,eax
				mul edi
				shr edx,3
				lea eax,[edx*4+edx]
				neg eax
				lea eax,[eax*2+ecx+'0']
				mov [ebp],al
				add ebp,1
			.endif
			mov eax,esi
			mov [ebp],al
			add ebp,1
		.endif
		mov [ebp],bl
		add ebp,1

	.endif

@@Done:	
	mov byte ptr [ebp],0
	mov eax,ebp
	sub eax,[esp+3*4][4*4][locals]	
	
	add esp,locals
	pop ebx
	pop edi
	pop esi
	pop ebp
	ret 3*4
	
@@_NaN_Infinity:
	mov ecx,nSign
	mov ebp,[esp+3*4][4*4][locals]
	add ecx,ecx
	mov edx,'-'
	sbb ecx,ecx
	and edx,ecx
	mov [ebp],dl
	sub ebp,ecx
	mov dword ptr [ebp],'#.1'	
	mov eax,edi
	or eax,esi
	.if !eax
		mov eax,'FNI'
		mov [ebp+3],eax	
		add ebp,6
	.elseif edi & 10000000000000000000b
		mov eax,'NANQ'
		mov [ebp+3],eax	
		add ebp,7
	.elseif ! (edi & 10000000000000000000b)
		mov eax,'NANS'
		mov [ebp+3],eax	
		add ebp,7
	.else
		mov eax,'DNI'
		mov [ebp+3],eax	
		add ebp,6
	.endif
	jmp @@Done

@@_Subnormal:
	mov ebp,[esp+3*4][4*4][locals]
	mov dword ptr [ebp],'!RRE'	
	add ebp,4
	jmp @@Done

@@Zero:
	mov ebp,[esp+3*4][4*4][locals]
	mov byte ptr [ebp],'0'	
	add ebp,1
	jmp @@Done

	;; div <edi::esi> by 10
	;; ret <edx::eax> 
	align 8
__div10:
	; div 10
	mov eax,0CCCCCCCDh; = b0
	mul esi; get a0*b0 = d1:d0
	mov ecx,edx;d1
	mov eax,0CCCCCCCDh; = b0
	xor ebx,ebx
	mul edi; get a1*b0 = e1:e0
	add ecx,eax;e0
	adc ebx,edx;e1
	mov eax,0CCCCCCCCh; =b1
	mul esi; get a0*b1 = f1:f0
	add ecx,eax;f0
	adc ebx,edx;f1
	mov ecx,0
	mov eax,0CCCCCCCCh; =b1
	adc ecx,ecx
	mul edi; get a1*b1 = g1:g0
	add eax,ebx;g0
	adc edx,ecx;g1
	shrd eax,edx,3
	shr edx,3;;------ quotient in edx::eax
	retn

R8ToStr endp 

ps. I'll probably extend this to TByte...

Enjoy

jj2007 · September 15, 2008, 08:14:08 AM

Quote from: drizz on September 15, 2008, 02:32:32 AM
I've always wanted to write this kind of float conversion function, but figuring how to deal with floating point binary was 'pita'. Finally code that works ( and actually float string formating was just as painful ).

Great code, my talented friend! :cheekygreen:
I have added it to my testbed, see attachment (search for R8ToStr in FloatStr.asm, console assemble & link) and timings below.

Code Select

************* Timings on a Core Duo Celeron M: **********************

457 cycles for 4*float$         1.23456789012346e-07
1087 cycles for 4*R8ToStr       1.23456789012346e-7
604 cycles for 4*FloatToStr     1234.568

Code sizes and FPU register preservation:
float$          size=823, all ST regs preserved
FloatToStr      size=895, ST 6-8 trashed
R8ToStr         size=919, no ST regs used
Ray's lib       size=700, all ST regs preserved
crt sprintf     size=???, all ST regs preserved

Credits to drizz for the qwtoa algo

423 cycles for FloatToStr       1.234568e-007
448 cycles for float$ REAL4     1.23456793517107e-05
437 cycles for float$ REAL8     1.23456789012346e-07
436 cycles for float$ REAL10    0.00123456789012346
1078 cycles for R8ToStr         1.23456789012346e-7
1090 cycles for Ray's lib       0.001235
4262 cycles for sprintf         1.234568e-007

---------
602 cycles for FloatToStr       1.234568
435 cycles for float$ REAL4     1.23456788063049
420 cycles for float$ REAL8     1.23456789012346
434 cycles for float$ REAL10    1.23456789012346
966 cycles for R8ToStr          1.23456789012346
1108 cycles for Ray's lib       1.234568
4442 cycles for sprintf         1.234568

---------
603 cycles for FloatToStr       1234.568
437 cycles for float$ REAL4     1234.56787109375
419 cycles for float$ REAL8     1234.56789012346
434 cycles for float$ REAL10    1234.56789012346
908 cycles for R8ToStr          1234.56789012346
1107 cycles for Ray's lib       1234.567890
4452 cycles for sprintf         1234.568

[attachment deleted by admin]

drizz · September 15, 2008, 12:28:20 PM

yes timigns are not that great but it's a start :)

Code Select

635 cycles for FloatToStr       1.234568e-007
600 cycles for float$ REAL4     1.23456793517107e-05
572 cycles for float$ REAL8     1.23456789012346e-07
545 cycles for float$ REAL10    0.00123456789012346
1177 cycles for R8ToStr         1.23456789012346e-7
940 cycles for Ray's lib        0.001235
5404 cycles for sprintf         1.234568e-007

---------
662 cycles for FloatToStr       1.234568
550 cycles for float$ REAL4     1.23456788063049
524 cycles for float$ REAL8     1.23456789012346
546 cycles for float$ REAL10    1.23456789012346
1095 cycles for R8ToStr         1.23456789012346
942 cycles for Ray's lib        1.234568
5642 cycles for sprintf         1.234568

---------
647 cycles for FloatToStr       1234.568
546 cycles for float$ REAL4     1234.56787109375
521 cycles for float$ REAL8     1234.56789012346
544 cycles for float$ REAL10    1234.56789012346
1036 cycles for R8ToStr         1234.56789012346
942 cycles for Ray's lib        1234.567890
5600 cycles for sprintf         1234.568

jj2007 · September 15, 2008, 03:05:36 PM

Quote from: drizz on September 15, 2008, 12:28:20 PM
yes timigns are not that great but it's a start :)[

A great start ;-)

There is one oddity in the third-last block:

Code Select

---------
616 cycles for FloatToStr       1234.568
448 cycles for float$ REAL4     1234.56787109375
429 cycles for float$ REAL8     1234.56789012346
446 cycles for float$ REAL10    1234.56789012346
929 cycles for R8ToStr          1234.56789012346
1110 cycles for Ray's lib       1234.567890
4556 cycles for sprintf         1234.568

---------
482 cycles for FloatToStr       1.234568e+123
465 cycles for float$ REAL4     1.23456789275539e+23
458 cycles for float$ REAL8     1.23456789012346e+123
478 cycles for float$ REAL10    1.23456789012346e+123
4315 cycles for R8ToStr <---------------------------------------------        1.23456789012346e+123
1182 cycles for Ray's lib       1.234567890123457E+0123
5928 cycles for sprintf         1.234568e+123

---------
464 cycles for FloatToStr       -1.234568e-123
477 cycles for float$ REAL4     -1.23456786887352e-23
465 cycles for float$ REAL8     -1.23456789012346e-123
473 cycles for float$ REAL10    -1.23456789012346e-123
3530 cycles for R8ToStr         -1.23456789012346e-123
1109 cycles for Ray's lib       -0.000000
6085 cycles for sprintf         -1.234568e-123

---------
11 cycles for FloatToStr        0
69 cycles for float$ REAL4      0
64 cycles for float$ REAL8      0
68 cycles for float$ REAL10     0
16 cycles for R8ToStr           0
345 cycles for Ray's lib        ERROR
694 cycles for sprintf          0

Also, the FPU lib version throws an error for the 0.0, which did not happen before... :dazzled:

jj2007 · September 15, 2008, 03:42:04 PM

Quote from: jj2007 on September 15, 2008, 03:05:36 PM
Code Select Expand
11 cycles for FloatToStr 0 69 cycles for float$ REAL4 0 64 cycles for float$ REAL8 0 68 cycles for float$ REAL10 0 16 cycles for R8ToStr 0 345 cycles for Ray's lib ERROR 694 cycles for sprintf 0
Also, the FPU lib version throws an error for the 0.0, which did not happen before... :dazzled:

Solved. I had fed a Real8 to Ray's lib :red

herge · September 17, 2008, 05:09:26 PM

Hi jj2007:

Code Select



369 cycles for 4*float$    	1.23456789012346e-07
866 cycles for 4*R8ToStr   	1.23456789012346e-7
642 cycles for 4*FloatToStr	1234.568



Code sizes and FPU register preservation:
float$   	size=823, all ST regs preserved
FloatToStr	size=895, ST 6-8 trashed
R8ToStr  	size=919, no ST regs used
Ray's lib	size=700, all ST regs preserved
crt sprintf	size=???, all ST regs preserved

------- New float$ Macro: -------------------
Divide	MyReal10	(=1.2345678e9)
by	12345678	(=1.2e7, in eax)
add	 11.1111   	(an immediate real)
Result=	111.111100000000	ok?
-- This para printed by one line of code! ---


Marketing report:
Sales were up 3.2% in 2007

Code:
print float$("\nMarketing report:\nSales were up %2f% in 2007\n",
Sales2007/Sales2006-1*100)

finit is ON	Version 1.3, 14 September 2008
Credits to drizz for the qwtoa algo

423 cycles for FloatToStr	1.234568e-007
379 cycles for float$ REAL4	1.23456793517107e-05
384 cycles for float$ REAL8	1.23456789012346e-07
378 cycles for float$ REAL10	0.00123456789012346
854 cycles for R8ToStr    	1.23456789012346e-7
1177 cycles for Ray's lib	0.001235
4050 cycles for sprintf  	1.234568e-007

---------
636 cycles for FloatToStr	1.234568
364 cycles for float$ REAL4	1.23456788063049
366 cycles for float$ REAL8	1.23456789012346
368 cycles for float$ REAL10	1.23456789012346
764 cycles for R8ToStr    	1.23456789012346
1185 cycles for Ray's lib	1.234568
4486 cycles for sprintf  	1.234568

---------
643 cycles for FloatToStr	1234.568
363 cycles for float$ REAL4	1234.56787109375
366 cycles for float$ REAL8	1234.56789012346
363 cycles for float$ REAL10	1234.56789012346
722 cycles for R8ToStr    	1234.56789012346
1245 cycles for Ray's lib	1234.567890
4358 cycles for sprintf  	1234.568

---------
454 cycles for FloatToStr	1.234568e+123
370 cycles for float$ REAL4	1.23456789275539e+23
388 cycles for float$ REAL8	1.23456789012346e+123
396 cycles for float$ REAL10	1.23456789012346e+123
3588 cycles for R8ToStr    	1.23456789012346e+123
1233 cycles for Ray's lib	1.234567890123457E+0123
5337 cycles for sprintf  	1.234568e+123

---------
442 cycles for FloatToStr	-1.234568e-123
386 cycles for float$ REAL4	-1.23456786887352e-23
392 cycles for float$ REAL8	-1.23456789012346e-123
395 cycles for float$ REAL10	-1.23456789012346e-123
2665 cycles for R8ToStr    	-1.23456789012346e-123
1168 cycles for Ray's lib	-0.000000
5781 cycles for sprintf  	-1.234568e-123

---------
8 cycles for FloatToStr	0
63 cycles for float$ REAL4	0
58 cycles for float$ REAL8	0
61 cycles for float$ REAL10	0
14 cycles for R8ToStr    	0
398 cycles for Ray's lib	ERROR
581 cycles for sprintf  	0

Regards herge

ToutEnMasm · September 18, 2008, 01:19:18 PM

Hello,
Perhaps someone can also test some functions of the strsafe.lib ?.
a sample is here
http://www.masm32.com/board/index.php?topic=8022.msg58718#msg58718

drizz · July 07, 2009, 09:07:32 PM

Update to my function for converting floats without fpu.

Now it's faster than all other :dance: (it's also bigger :lol)

Converts binary float to decimal float with two 64bit multiplications with precalculated values (If anyone is interested in Mathematica formulas (trivial) for making tables i will post them ).
Basically exponent is divided by 64, quotient is used to reduce large part, and remainder for remaining part.

X₍₁₀₎ * 2^binExp -> Y₍₁₀₎ * 10^decExp

Y₍₁₀₎ = (X₍₁₀₎ * Table1[binExp/64]) * Table2[binExp%64]
decExp = Table3[binExp/64] + Table4[binExp%64]

Y is then converted using my uint64tostr function.

X/Y is the mantissa scaled to 64bits.

check it out!

updated attachment

[attachment deleted by admin]

dedndave · July 07, 2009, 09:38:39 PM

very cool Drizz - let me play with it for a while...

jj2007 · July 07, 2009, 10:06:48 PM

Yes it's cool :U

Celeron M:

Code Select


Testing float$
        1234567890123456789 digits precision
PI      3.14159265358979323846 (there are many more digits...)
Str$    3.14159265358979324
crt     3.14159265358979310       (CRT printf or sprintf)
R8ToStr 3,14159265358979          (drizz)
FloatTo 3.141593                  (Masm32 lib FloatToStr)

Log2(e) 1.4426950408889634070
Str$    1.44269504088896341
crt     1.44269504088896340
R8ToStr 1,44269504088896          (drizz)
FloatTo 1.442695

Lg2(10) 3.3219280948873623480
Str$    3.32192809488736235
crt     3.32192809488736220
R8ToStr 3,32192809488736          (drizz)
FloatTo 3.321928

Lg10(2) 0.3010299956639811952
Str$    0.301029995663981195
crt     0.30102999566398120
R8ToStr 0,301029995663981         (drizz)
FloatTo 0.30103

Lge(2)  0.6931471805599453094
Str$    0.6931471805599453095
crt     0.69314718055994529
R8ToStr 0,693147180559945         (drizz)
FloatTo 0.6931472

Testing float$/printf
PI      3.1415926535897932384626433832795
Str$    3.14159265358979324
crt     3.14159265358979310

295     Str$('%7f', MyPI)       3.141593
474     cycles for Str$         3.14159265358979
363     cycles for R8ToStr      3,14159265358979
611     cycles for FloatToStr
4307    cycles for crt_sprintf
96      cycles for dwtoa

[attachment deleted by admin]

drizz · July 07, 2009, 11:32:27 PM

small formating bug found, please re-download.

ToutEnMasm · July 08, 2009, 05:26:39 AM

Quote
Intel(R) Celeron(R) CPU 2.80GHz
Microsoft Windows XP Professional Build Service Pack 3 2600

Testing float$
1234567890123456789 digits precision
PI 3.14159265358979323846 (there are many more digits...)
Str$ 3.14159265358979324
crt 3.14159265358979310 (CRT printf or sprintf)
R8ToStr 3,14159265358979 (drizz)
FloatTo 3.141593 (Masm32 lib FloatToStr)

Log2(e) 1.4426950408889634070
Str$ 1.44269504088896341
crt 1.44269504088896340
R8ToStr 1,44269504088896 (drizz)
FloatTo 1.442695

Lg2(10) 3.3219280948873623480
Str$ 3.32192809488736235
crt 3.32192809488736220
R8ToStr 3,32192809488736 (drizz)
FloatTo 3.321928

Lg10(2) 0.3010299956639811952
Str$ 0.301029995663981195
crt 0.30102999566398120
R8ToStr 0,301029995663981 (drizz)
FloatTo 0.30103

Lge(2) 0.6931471805599453094
Str$ 0.6931471805599453095
crt 0.69314718055994529
R8ToStr 0,693147180559945 (drizz)
FloatTo 0.6931472

Testing float$/printf
PI 3.1415926535897932384626433832795
Str$ 3.14159265358979324
crt 3.14159265358979310

535 Str$('%7f', MyPI) 3.141593
850 cycles for Str$ 3.14159265358979
605 cycles for R8ToStr 3,14159265358979
2208 cycles for FloatToStr
7648 cycles for crt_sprintf
152 cycles for dwtoa

Farabi · July 08, 2009, 06:19:34 AM

:dazzled:
OMG. Thats 8 times faster than MS sprintf.

Astro · August 25, 2009, 04:37:11 PM

I wish I understood this stuff (it's not the language in this case...).

Even though I can't see how it works, I understand less cycles = faster, so awesome work on that! :U

Best regards,
Astro.

drizz · August 25, 2009, 08:58:32 PM

Well i hope this helps you understand better.

bits:
SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
10000000000000000000000000000000 00000000000000000000000000000000 ; sign
01111111111100000000000000000000 00000000000000000000000000000000 ; exponent
00000000000011111111111111111111 11111111111111111111111111111111 ; fraction
00000000000100000000000000000000 00000000000000000000000000000000 ; implied bit

Memory Layout for REAL8 would look like this (little endian):

dwords:
DD FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF
bytes:
DB FFFFFFFF,FFFFFFFF,FFFFFFFF,FFFFFFFF, FFFFFFFF,FFFFFFFF,EEEEFFFF,SEEEEEEE
bits:
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFEEEEEEEEEEES

I'm going to skip all the explanation for bias and special cases and go right to the conversion.

Assuming we already substracted bias and checked for zero,infinity,..etc.

we have a number that is represented like this:

(S)   F   *   2^   E
      (2)          (2)

to convert to human base (base 10) we need to transform the number to

(S)   F'   *   10^   E'
      (2)            (2)

it requires a multiplication by certain 10^N/2^M ratio (for negative E other way around, i will write about the first case),

because (2^E/2^M)=1 (where E=M), 10^N remains (i.e. 10^E')

prior to multiplication we adjust the Fraction F to be full 64bit number by shifting.

00000000000011111111111111111111 11111111111111111111111111111111 ; fraction
00000000000100000000000000000000 00000000000000000000000000000000 ; implied bit

000000000001.11111111111111111111 11111111111111111111111111111111

first we shift bits to the left by 11 , Exponent E is decreased by 11 (the bits that occupied sign and exponent)

111111111111.11111111111111111111 11111111111111111111100000000000

now we "shift" (virtual)decimal point to the right, Exponent E is decreased by 52

11111111111111111111111111111111 11111111111111111111100000000000.

only thing that is left is multiplication of F with precomputed 10^N/2^M ratio

64Bit * 64Bit = 128Bit result

Now, we don't want all 128bits of the result, only the top 64 "precise" bits.
We account for that in the ( 10^N/2^M ) ratio by adding 64 to M, so in fact we are multiplying by
10^N/2^(64+M). (chopping off lower 64 bits is the same as (shifting right) dividing by 2^64)
furthermore we require the ratio to be as close to 2^64 as possible so we don't loose precision, hence values for N will not be equidistant, hence we need another table for exponents N

Doing this with only one table (one multiplication) for ratio and one for exponents is possible but would also take too much space.
So i split the operation to exact multiple and remainder (of 64) tables.

The rest is just rounding and formatting...

News:

Float to string without fpu