I've always wanted to write this kind of float conversion function, but figuring how to deal with floating point binary was 'pita'. Finally code that works ( and actually float string formating was just as painful ).
This is a preliminary version open for suggestions and optimizations :) and bug reports :)
; SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
; 10000000000000000000000000000000 00000000000000000000000000000000
; 01111111111100000000000000000000 00000000000000000000000000000000
; 00000000000011111111111111111111 11111111111111111111111111111111
; 100000000000000000000 00000000000000000000000000000000 ; implied
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
R8_BIAS equ 1023
R8_MANT equ 52
R8ToStr proc r8:REAL8, lpBuffer:PTR
push ebp
push esi
push edi
push ebx
locals = 32+3*4
tmpbuff equ <[esp]>
iTrail equ <dword ptr [esp+32]>
iExp equ <dword ptr [esp+32+4]>
nSign equ <dword ptr [esp+32+4+4]>
add esp,-locals
mov ecx,10000000000000000000000000000000b
mov ebx,[esp+2*4][4*4][locals]
mov esi,[esp+1*4][4*4][locals]
and ecx,ebx; sign bit
and ebx,not 10000000000000000000000000000000b
mov nSign,ecx
mov edi,ebx
shr ebx,20; 01111111111100000000000000000000
and edi,00000000000011111111111111111111b
cmp ebx,11111111111b
je @@_NaN_Infinity
mov eax,esi
or eax,edi
or eax,ebx
jz @@Zero
sub ebx,R8_BIAS; exponent
or edi,00000000000100000000000000000000b; high 20 bits + 1 implied
xor ebp,ebp
mov iExp,ebx
;; 52bits in edi::esi
.if sdword ptr ebx > 63
shld edi,esi,63-R8_MANT
shl esi,63-R8_MANT
.repeat
call __div10
; mod 10
lea ecx,[eax*4+eax]
add ecx,ecx
sub esi,ecx
bsr ecx,edx
neg ecx
add ecx,31
shld edx,eax,cl
shl eax,cl
mov ebx,iExp
sub ebx,ecx
or esi,eax
mov edi,edx
mov iExp,ebx
add ebp,1
.until sdword ptr ebx <= 63
mov ecx,63
sub ecx,ebx
shrd esi,edi,cl
shr edi,cl
.else
.while sdword ptr ebx < R8_MANT
.while ! (edi & 0F0000000h)
mov eax,esi
mov edx,edi
shld edi,esi,2
shl esi,2
add esi,eax
adc edi,edx
add ebx,1
dec ebp
.endw
bsr ecx,edi
sub ecx,31-4
sbb edx,edx
not edx
and ecx,edx
shrd esi,edi,cl
shr edi,cl
add ebx,ecx
.endw
lea ecx,[ebx-R8_MANT]
shld edi,esi,cl
shl esi,cl
.endif
; mov edx,nSign
; pushad
; shr edx,1
; sbb edx,edx
; and edx,'-'-'+'
; add edx,'+'
; invoke printf,T("%c%I64u.0e%i",13,10),edx,edi::esi,ebp
; popad
; job done, now just the hard part - formating
;; adjust number to 16 digits 2386F26FC0FFFFh
.while edi >= 2386F2h; LOW = 6FC0FFFF
.break .if edi == 2386F2h && esi < 6FC0FFFFh
call __div10
mov esi,eax
mov edi,edx
add ebp,1;; increase exponent
.endw
;; round it if needed (if 16 digit) 38D7EA4C67FFFh
.if edi>=38D7Eh;A4C67FFF
.if ! (edi == 38D7Eh && esi < 0A4C67FFFh)
add esi,5
adc edi,0
call __div10
add ebp,1; increase exponent
mov esi,eax
mov edi,edx
.endif
.endif
mov iExp,ebp
;; trailing zero count
xor ebp,ebp
jmp @F
.repeat
mov esi,eax
mov edi,edx
add ebp,1
@@: call __div10
lea ecx,[eax*4+eax]
neg ecx
add ecx,ecx
add ecx,esi
.until !zero?
mov iTrail,ebp
xor ebp,ebp
jmp @F
.repeat
call __div10
@@: lea ecx,[eax*4+eax]
neg ecx
lea ecx,[ecx*2+esi+'0']
mov tmpbuff[ebp],cl
add ebp,1
mov esi,eax
mov edi,edx
or eax,edx
.until zero?
mov ecx,nSign
mov esi,[esp+3*4][4*4][locals]
add ecx,ecx
mov edx,'-'
mov edi,iExp; exp
sbb ecx,ecx
and edx,ecx
mov [esi],dl
sub esi,ecx
add edi,iTrail
xchg esi,ebp
.if zero?;; exponent is 0
.repeat
mov al,tmpbuff[esi-1]
mov [ebp],al
add ebp,1
sub esi,1
.until zero?
.elseif (sdword ptr edi >=-15 && sdword ptr edi < 0)
;; check for format without exp
add edi,esi
.if sdword ptr edi <= 0
mov [ebp],word ptr '.0'
add ebp,2
.while sdword ptr edi < 0
mov [ebp],byte ptr '0'
add ebp,1
add edi,1
.endw
.repeat
mov al,tmpbuff[esi-1]
mov [ebp],al
add ebp,1
sub esi,1
.until zero?
.else
.repeat
mov al,tmpbuff[esi-1]
mov [ebp],al
add ebp,1
sub edi,1
.if zero?
mov [ebp],byte ptr '.'
add ebp,1
.endif
sub esi,1
.until zero?
.endif
.else
;
mov al,tmpbuff[esi-1]
mov [ebp],al
add ebp,1
sub esi,1
jz @F
mov [ebp],byte ptr '.'
add ebp,1
.repeat
mov al,tmpbuff[esi-1]
mov [ebp],al
add ebp,1
add edi,1
sub esi,1
.until zero?
@@:
mov [ebp],byte ptr 'e'
add ebp,1
mov eax,edi
cdq
and edx,'-'-'+'
add edx,'+'
mov [ebp],dl
add ebp,1
; abs
cdq
xor eax,edx
sub eax,edx
mov edi,0CCCCCCCDh; magic
mov ecx,eax
mul edi
shr edx,3
lea ebx,[edx*4+edx]
neg ebx
lea ebx,[ebx*2+ecx+'0']
mov eax,edx
.if edx
mov ecx,eax
mul edi
shr edx,3
lea esi,[edx*4+edx]
neg esi
lea esi,[esi*2+ecx+'0']
mov eax,edx
.if edx
mov ecx,eax
mul edi
shr edx,3
lea eax,[edx*4+edx]
neg eax
lea eax,[eax*2+ecx+'0']
mov [ebp],al
add ebp,1
.endif
mov eax,esi
mov [ebp],al
add ebp,1
.endif
mov [ebp],bl
add ebp,1
.endif
@@Done:
mov byte ptr [ebp],0
mov eax,ebp
sub eax,[esp+3*4][4*4][locals]
add esp,locals
pop ebx
pop edi
pop esi
pop ebp
ret 3*4
@@_NaN_Infinity:
mov ecx,nSign
mov ebp,[esp+3*4][4*4][locals]
add ecx,ecx
mov edx,'-'
sbb ecx,ecx
and edx,ecx
mov [ebp],dl
sub ebp,ecx
mov dword ptr [ebp],'#.1'
mov eax,edi
or eax,esi
.if !eax
mov eax,'FNI'
mov [ebp+3],eax
add ebp,6
.elseif edi & 10000000000000000000b
mov eax,'NANQ'
mov [ebp+3],eax
add ebp,7
.elseif ! (edi & 10000000000000000000b)
mov eax,'NANS'
mov [ebp+3],eax
add ebp,7
.else
mov eax,'DNI'
mov [ebp+3],eax
add ebp,6
.endif
jmp @@Done
@@_Subnormal:
mov ebp,[esp+3*4][4*4][locals]
mov dword ptr [ebp],'!RRE'
add ebp,4
jmp @@Done
@@Zero:
mov ebp,[esp+3*4][4*4][locals]
mov byte ptr [ebp],'0'
add ebp,1
jmp @@Done
;; div <edi::esi> by 10
;; ret <edx::eax>
align 8
__div10:
; div 10
mov eax,0CCCCCCCDh; = b0
mul esi; get a0*b0 = d1:d0
mov ecx,edx;d1
mov eax,0CCCCCCCDh; = b0
xor ebx,ebx
mul edi; get a1*b0 = e1:e0
add ecx,eax;e0
adc ebx,edx;e1
mov eax,0CCCCCCCCh; =b1
mul esi; get a0*b1 = f1:f0
add ecx,eax;f0
adc ebx,edx;f1
mov ecx,0
mov eax,0CCCCCCCCh; =b1
adc ecx,ecx
mul edi; get a1*b1 = g1:g0
add eax,ebx;g0
adc edx,ecx;g1
shrd eax,edx,3
shr edx,3;;------ quotient in edx::eax
retn
R8ToStr endp
ps. I'll probably extend this to TByte...
Enjoy
Quote from: drizz on September 15, 2008, 02:32:32 AM
I've always wanted to write this kind of float conversion function, but figuring how to deal with floating point binary was 'pita'. Finally code that works ( and actually float string formating was just as painful ).
Great code, my talented friend! :cheekygreen:
I have added it to my testbed, see attachment (search for R8ToStr in FloatStr.asm, console assemble & link) and timings below.
************* Timings on a Core Duo Celeron M: **********************
457 cycles for 4*float$ 1.23456789012346e-07
1087 cycles for 4*R8ToStr 1.23456789012346e-7
604 cycles for 4*FloatToStr 1234.568
Code sizes and FPU register preservation:
float$ size=823, all ST regs preserved
FloatToStr size=895, ST 6-8 trashed
R8ToStr size=919, no ST regs used
Ray's lib size=700, all ST regs preserved
crt sprintf size=???, all ST regs preserved
Credits to drizz for the qwtoa algo
423 cycles for FloatToStr 1.234568e-007
448 cycles for float$ REAL4 1.23456793517107e-05
437 cycles for float$ REAL8 1.23456789012346e-07
436 cycles for float$ REAL10 0.00123456789012346
1078 cycles for R8ToStr 1.23456789012346e-7
1090 cycles for Ray's lib 0.001235
4262 cycles for sprintf 1.234568e-007
---------
602 cycles for FloatToStr 1.234568
435 cycles for float$ REAL4 1.23456788063049
420 cycles for float$ REAL8 1.23456789012346
434 cycles for float$ REAL10 1.23456789012346
966 cycles for R8ToStr 1.23456789012346
1108 cycles for Ray's lib 1.234568
4442 cycles for sprintf 1.234568
---------
603 cycles for FloatToStr 1234.568
437 cycles for float$ REAL4 1234.56787109375
419 cycles for float$ REAL8 1234.56789012346
434 cycles for float$ REAL10 1234.56789012346
908 cycles for R8ToStr 1234.56789012346
1107 cycles for Ray's lib 1234.567890
4452 cycles for sprintf 1234.568
[attachment deleted by admin]
yes timigns are not that great but it's a start :)
635 cycles for FloatToStr 1.234568e-007
600 cycles for float$ REAL4 1.23456793517107e-05
572 cycles for float$ REAL8 1.23456789012346e-07
545 cycles for float$ REAL10 0.00123456789012346
1177 cycles for R8ToStr 1.23456789012346e-7
940 cycles for Ray's lib 0.001235
5404 cycles for sprintf 1.234568e-007
---------
662 cycles for FloatToStr 1.234568
550 cycles for float$ REAL4 1.23456788063049
524 cycles for float$ REAL8 1.23456789012346
546 cycles for float$ REAL10 1.23456789012346
1095 cycles for R8ToStr 1.23456789012346
942 cycles for Ray's lib 1.234568
5642 cycles for sprintf 1.234568
---------
647 cycles for FloatToStr 1234.568
546 cycles for float$ REAL4 1234.56787109375
521 cycles for float$ REAL8 1234.56789012346
544 cycles for float$ REAL10 1234.56789012346
1036 cycles for R8ToStr 1234.56789012346
942 cycles for Ray's lib 1234.567890
5600 cycles for sprintf 1234.568
Quote from: drizz on September 15, 2008, 12:28:20 PM
yes timigns are not that great but it's a start :)[
A great start ;-)
There is one oddity in the third-last block:
---------
616 cycles for FloatToStr 1234.568
448 cycles for float$ REAL4 1234.56787109375
429 cycles for float$ REAL8 1234.56789012346
446 cycles for float$ REAL10 1234.56789012346
929 cycles for R8ToStr 1234.56789012346
1110 cycles for Ray's lib 1234.567890
4556 cycles for sprintf 1234.568
---------
482 cycles for FloatToStr 1.234568e+123
465 cycles for float$ REAL4 1.23456789275539e+23
458 cycles for float$ REAL8 1.23456789012346e+123
478 cycles for float$ REAL10 1.23456789012346e+123
4315 cycles for R8ToStr <--------------------------------------------- 1.23456789012346e+123
1182 cycles for Ray's lib 1.234567890123457E+0123
5928 cycles for sprintf 1.234568e+123
---------
464 cycles for FloatToStr -1.234568e-123
477 cycles for float$ REAL4 -1.23456786887352e-23
465 cycles for float$ REAL8 -1.23456789012346e-123
473 cycles for float$ REAL10 -1.23456789012346e-123
3530 cycles for R8ToStr -1.23456789012346e-123
1109 cycles for Ray's lib -0.000000
6085 cycles for sprintf -1.234568e-123
---------
11 cycles for FloatToStr 0
69 cycles for float$ REAL4 0
64 cycles for float$ REAL8 0
68 cycles for float$ REAL10 0
16 cycles for R8ToStr 0
345 cycles for Ray's lib ERROR
694 cycles for sprintf 0
Also, the FPU lib version throws an error for the 0.0, which did not happen before... :dazzled:
Quote from: jj2007 on September 15, 2008, 03:05:36 PM
11 cycles for FloatToStr 0
69 cycles for float$ REAL4 0
64 cycles for float$ REAL8 0
68 cycles for float$ REAL10 0
16 cycles for R8ToStr 0
345 cycles for Ray's lib ERROR
694 cycles for sprintf 0
Also, the FPU lib version throws an error for the 0.0, which did not happen before... :dazzled:
Solved. I had fed a Real8 to Ray's lib :red
Hi jj2007:
369 cycles for 4*float$ 1.23456789012346e-07
866 cycles for 4*R8ToStr 1.23456789012346e-7
642 cycles for 4*FloatToStr 1234.568
Code sizes and FPU register preservation:
float$ size=823, all ST regs preserved
FloatToStr size=895, ST 6-8 trashed
R8ToStr size=919, no ST regs used
Ray's lib size=700, all ST regs preserved
crt sprintf size=???, all ST regs preserved
------- New float$ Macro: -------------------
Divide MyReal10 (=1.2345678e9)
by 12345678 (=1.2e7, in eax)
add 11.1111 (an immediate real)
Result= 111.111100000000 ok?
-- This para printed by one line of code! ---
Marketing report:
Sales were up 3.2% in 2007
Code:
print float$("\nMarketing report:\nSales were up %2f% in 2007\n",
Sales2007/Sales2006-1*100)
finit is ON Version 1.3, 14 September 2008
Credits to drizz for the qwtoa algo
423 cycles for FloatToStr 1.234568e-007
379 cycles for float$ REAL4 1.23456793517107e-05
384 cycles for float$ REAL8 1.23456789012346e-07
378 cycles for float$ REAL10 0.00123456789012346
854 cycles for R8ToStr 1.23456789012346e-7
1177 cycles for Ray's lib 0.001235
4050 cycles for sprintf 1.234568e-007
---------
636 cycles for FloatToStr 1.234568
364 cycles for float$ REAL4 1.23456788063049
366 cycles for float$ REAL8 1.23456789012346
368 cycles for float$ REAL10 1.23456789012346
764 cycles for R8ToStr 1.23456789012346
1185 cycles for Ray's lib 1.234568
4486 cycles for sprintf 1.234568
---------
643 cycles for FloatToStr 1234.568
363 cycles for float$ REAL4 1234.56787109375
366 cycles for float$ REAL8 1234.56789012346
363 cycles for float$ REAL10 1234.56789012346
722 cycles for R8ToStr 1234.56789012346
1245 cycles for Ray's lib 1234.567890
4358 cycles for sprintf 1234.568
---------
454 cycles for FloatToStr 1.234568e+123
370 cycles for float$ REAL4 1.23456789275539e+23
388 cycles for float$ REAL8 1.23456789012346e+123
396 cycles for float$ REAL10 1.23456789012346e+123
3588 cycles for R8ToStr 1.23456789012346e+123
1233 cycles for Ray's lib 1.234567890123457E+0123
5337 cycles for sprintf 1.234568e+123
---------
442 cycles for FloatToStr -1.234568e-123
386 cycles for float$ REAL4 -1.23456786887352e-23
392 cycles for float$ REAL8 -1.23456789012346e-123
395 cycles for float$ REAL10 -1.23456789012346e-123
2665 cycles for R8ToStr -1.23456789012346e-123
1168 cycles for Ray's lib -0.000000
5781 cycles for sprintf -1.234568e-123
---------
8 cycles for FloatToStr 0
63 cycles for float$ REAL4 0
58 cycles for float$ REAL8 0
61 cycles for float$ REAL10 0
14 cycles for R8ToStr 0
398 cycles for Ray's lib ERROR
581 cycles for sprintf 0
Regards herge
Hello,
Perhaps someone can also test some functions of the strsafe.lib ?.
a sample is here
http://www.masm32.com/board/index.php?topic=8022.msg58718#msg58718
Update to my function for converting floats without fpu.
Now it's faster than all other :dance: (it's also bigger :lol)
Converts binary float to decimal float with two 64bit multiplications with precalculated values (If anyone is interested in Mathematica formulas (trivial) for making tables i will post them ).
Basically exponent is divided by 64, quotient is used to reduce large part, and remainder for remaining part.
X(10) * 2binExp -> Y(10) * 10decExp
Y(10) = (X(10) * Table1[binExp/64]) * Table2[binExp%64]
decExp = Table3[binExp/64] + Table4[binExp%64]
Y is then converted using my uint64tostr function.
X/Y is the mantissa scaled to 64bits.
check it out!
updated attachment
[attachment deleted by admin]
very cool Drizz - let me play with it for a while...
Yes it's cool :U
Celeron M:
Testing float$
1234567890123456789 digits precision
PI 3.14159265358979323846 (there are many more digits...)
Str$ 3.14159265358979324
crt 3.14159265358979310 (CRT printf or sprintf)
R8ToStr 3,14159265358979 (drizz)
FloatTo 3.141593 (Masm32 lib FloatToStr)
Log2(e) 1.4426950408889634070
Str$ 1.44269504088896341
crt 1.44269504088896340
R8ToStr 1,44269504088896 (drizz)
FloatTo 1.442695
Lg2(10) 3.3219280948873623480
Str$ 3.32192809488736235
crt 3.32192809488736220
R8ToStr 3,32192809488736 (drizz)
FloatTo 3.321928
Lg10(2) 0.3010299956639811952
Str$ 0.301029995663981195
crt 0.30102999566398120
R8ToStr 0,301029995663981 (drizz)
FloatTo 0.30103
Lge(2) 0.6931471805599453094
Str$ 0.6931471805599453095
crt 0.69314718055994529
R8ToStr 0,693147180559945 (drizz)
FloatTo 0.6931472
Testing float$/printf
PI 3.1415926535897932384626433832795
Str$ 3.14159265358979324
crt 3.14159265358979310
295 Str$('%7f', MyPI) 3.141593
474 cycles for Str$ 3.14159265358979
363 cycles for R8ToStr 3,14159265358979
611 cycles for FloatToStr
4307 cycles for crt_sprintf
96 cycles for dwtoa
[attachment deleted by admin]
small formating bug found, please re-download.
Quote
Intel(R) Celeron(R) CPU 2.80GHz
Microsoft Windows XP Professional Build Service Pack 3 2600
Testing float$
1234567890123456789 digits precision
PI 3.14159265358979323846 (there are many more digits...)
Str$ 3.14159265358979324
crt 3.14159265358979310 (CRT printf or sprintf)
R8ToStr 3,14159265358979 (drizz)
FloatTo 3.141593 (Masm32 lib FloatToStr)
Log2(e) 1.4426950408889634070
Str$ 1.44269504088896341
crt 1.44269504088896340
R8ToStr 1,44269504088896 (drizz)
FloatTo 1.442695
Lg2(10) 3.3219280948873623480
Str$ 3.32192809488736235
crt 3.32192809488736220
R8ToStr 3,32192809488736 (drizz)
FloatTo 3.321928
Lg10(2) 0.3010299956639811952
Str$ 0.301029995663981195
crt 0.30102999566398120
R8ToStr 0,301029995663981 (drizz)
FloatTo 0.30103
Lge(2) 0.6931471805599453094
Str$ 0.6931471805599453095
crt 0.69314718055994529
R8ToStr 0,693147180559945 (drizz)
FloatTo 0.6931472
Testing float$/printf
PI 3.1415926535897932384626433832795
Str$ 3.14159265358979324
crt 3.14159265358979310
535 Str$('%7f', MyPI) 3.141593
850 cycles for Str$ 3.14159265358979
605 cycles for R8ToStr 3,14159265358979
2208 cycles for FloatToStr
7648 cycles for crt_sprintf
152 cycles for dwtoa
:dazzled:
OMG. Thats 8 times faster than MS sprintf.
I wish I understood this stuff (it's not the language in this case...).
Even though I can't see how it works, I understand less cycles = faster, so awesome work on that! :U
Best regards,
Astro.
Well i hope this helps you understand better.
bits:
SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
10000000000000000000000000000000 00000000000000000000000000000000 ; sign
01111111111100000000000000000000 00000000000000000000000000000000 ; exponent
00000000000011111111111111111111 11111111111111111111111111111111 ; fraction
00000000000100000000000000000000 00000000000000000000000000000000 ; implied bit
Memory Layout for REAL8 would look like this (little endian):
dwords:
DD FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,SEEEEEEEEEEEFFFFFFFFFFFFFFFFFFFF
bytes:
DB FFFFFFFF,FFFFFFFF,FFFFFFFF,FFFFFFFF, FFFFFFFF,FFFFFFFF,EEEEFFFF,SEEEEEEE
bits:
FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFEEEEEEEEEEES
I'm going to skip all the explanation for bias and special cases and go right to the conversion.
Assuming we already substracted bias and checked for zero,infinity,..etc.
we have a number that is represented like this:
(S) F * 2^ E
(2) (2)
to convert to human base (base 10) we need to transform the number to
(S) F' * 10^ E'
(2) (2)
it requires a multiplication by certain 10^N/2^M ratio (for negative E other way around, i will write about the first case),
because (2^E/2^M)=1 (where E=M), 10^N remains (i.e. 10^E')
prior to multiplication we adjust the Fraction F to be full 64bit number by shifting.
00000000000011111111111111111111 11111111111111111111111111111111 ; fraction
00000000000100000000000000000000 00000000000000000000000000000000 ; implied bit
000000000001.11111111111111111111 11111111111111111111111111111111
first we shift bits to the left by 11 , Exponent E is decreased by 11 (the bits that occupied sign and exponent)
111111111111.11111111111111111111 11111111111111111111100000000000
now we "shift" (virtual)decimal point to the right, Exponent E is decreased by 52
11111111111111111111111111111111 11111111111111111111100000000000.
only thing that is left is multiplication of F with precomputed 10^N/2^M ratio
64Bit * 64Bit = 128Bit result
Now, we don't want all 128bits of the result, only the top 64 "precise" bits.
We account for that in the ( 10^N/2^M ) ratio by adding 64 to M, so in fact we are multiplying by
10^N/2^(64+M). (chopping off lower 64 bits is the same as (shifting right) dividing by 2^64)
furthermore we require the ratio to be as close to 2^64 as possible so we don't loose precision, hence values for N will not be equidistant, hence we need another table for exponents N
Doing this with only one table (one multiplication) for ratio and one for exponents is possible but would also take too much space.
So i split the operation to exact multiple and remainder (of 64) tables.
The rest is just rounding and formatting...
Excuse my ignorance: Would the same be possible for a REAL10?? Conversion from R4/R8 to R10 is easy...
.data
MyR4 REAL4 123.456
MyR8 REAL8 123.456
MyR10 REAL10 123.456
MyTmp10 REAL10 0.0
.code
fld MyR4
fstp MyTmp10
call drizzconvert
fld MyR8
fstp MyTmp10
call drizzconvert
fld MyR10
fstp MyTmp10
call drizzconvert
I did plan to code REAL10 version too.
I would just need to enlarge the tables, since Fraction is not greater than 64bits everthing else stays the same (in a nutshell).
I am looking forward to that :U
Speedwise it looks very promising. Perhaps you can generate the tables once in the uninitialised data section? That's what Dave and I did, using the FPU.
numerical computation guide:
http://dlc.sun.com/pdf/819-3693/819-3693.pdf
80-bit real10 format (intel extended reals)
(http://img34.imageshack.us/img34/5912/extreal.gif)
notice that Signaling NaN's require at least one of the lower 63 bits to be set
this is not so for Quiet NaN's
Quiet NaN's and Signaling NaN's may have the sign bit set to 1
the value: ffff c0000000 00000000 is an "Indefinate"
a special case of the set of Quiet NaN values
all other values are "Invalid"
Great explanation! Thankyou! :thumbu :8)
Best regards,
Astro.
Here it is!
Now, it isn't finished yet, this is just a test version that has 18-digit precision.
I couldn't make it work for 19 digits (yet).
I would probably need to recheck the tables or add some special handling to support all 19 digits.
play around and see if you can find some bugs.
:8)
Looks advanced!
I just have a question about:
mov ebx,[esp+2*4][4*4][locals]
How in the world is this legal?! :dazzled:
lol
it's just assembler syntax
it makes it easier to understand where the values came from
"2*4", "4*4", and "locals" are all essentailly constants that get added together
if he just put the number in there, it may not be as easy to figure out how he arrived at that constant
Quote from: 2-Bit Chip on August 30, 2009, 07:52:36 AMmov ebx,[esp+2*4][4*4][locals]
parameters are accessed through stack to free ebp for other use.
it means:
[esp+second argument][size of registers pushed ][size of local variables]
2*4,4*4;; 4 means size of DWORD
2*DWORD,4*DWORD
(second argument actually in this case means higher dword of r8 since real8 is 8 bytes)
concatinated brackets in masm mean "plus"
mov eax,[edx][0][0][0][0][0][0][0][0]
will work just fine
Quote from: drizz on August 26, 2009, 05:24:56 PM
I would probably need to recheck the tables or add some special handling to support all 19 digits.
play around and see if you can find some bugs.
Looks fine and is bloody fast, compliments :U
The only open question is if you could not
generate the table in the .data? section, in order to avoid that bloat feeling ::)
3,14159265358979324
3.141592653589793238
369 drizz
630 Str$ JJ