I needed some floating-point comparison routines, so I wrote these macros. They work with REAL4, REAL8 and REAL10 memory variables. They seem to be working correctly. They return 1 if true, 0 if false and -1 on error.
; Floating-point comparison macros
; Greg Lyon - 2005
;
; Thanks to Raymond Filiatreault for
; the excellent tutorial 'Simply FPU'.
; It made writing these much easier.
;
.586
.MODEL FLAT, stdcall
option casemap:none
include windows.inc
include kernel32.inc
include user32.inc
include masm32.inc
include c:\masm32\macros\macros.asm
includelib kernel32.lib
includelib user32.lib
includelib masm32.lib
;------------------------------------------------------------
;------------------------------------------------------------
isgreater MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja true
jb false
jz false
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;------------------------------------------------------------
isgreaterequal MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja true
jb false
jz true
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;;------------------------------------------------------------
isless MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja false
jb true
jz false
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;;------------------------------------------------------------
islessequal MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja false
jb true
jz true
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;;------------------------------------------------------------
islessgreater MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja true
jb true
jz false
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;;------------------------------------------------------------
isequal MACRO r1:req, r2:req
LOCAL error, true, false, clear
finit
fld r2
fld r1
fcom
fstsw ax
fwait
sahf
jpe error
ja false
jb false
jz true
error:
mov eax, -1
jmp clear
true:
mov eax, 1
jmp clear
false:
xor eax, eax
clear:
fstp st(0)
fstp st(0)
EXITM <eax>
ENDM
;------------------------------------------------------------
isnotequal MACRO r1:req, r2:req
EXITM <islessgreater(r1, r2)>
ENDM
;------------------------------------------------------------
isapproxequal MACRO r1:req, r2:req, tolerance:req
LOCAL diff
.data?
diff REAL10 ?
.code
finit
.IF isgreater(r1, r2)
fld r2
fld r1
.ELSE
fld r1
fld r2
.ENDIF
fsubp st(1), st(0)
fstp diff
fwait
EXITM <islessequal(diff, tolerance)>
ENDM
;------------------------------------------------------------
.DATA
szTrue BYTE "True ", 10, 0
szFalse BYTE "False", 10, 0
szError BYTE "Error", 10, 0
.CODE
start:
print SADD("isgreater(FP4(5.00), FP4(4.99)) = ")
mov eax, isgreater(FP4(5.00), FP4(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isgreater(FP8(5.00), FP8(4.99)) = ")
mov eax, isgreater(FP8(5.00), FP8(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isgreater(FP10(5.00), FP10(4.99)) = ")
mov eax, isgreater(FP10(5.00), FP10(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
print SADD("isless(FP4(5.00), FP4(4.99)) = ")
mov eax, isless(FP4(5.00), FP4(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isless(FP8(5.00), FP8(4.99)) = ")
mov eax, isless(FP8(5.00), FP8(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isless(FP10(5.00), FP10(4.99)) = ")
mov eax, isless(FP10(5.00), FP10(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
print SADD("isequal(FP4(5.00), FP4(4.99)) = ")
mov eax, isequal(FP4(5.00), FP4(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isequal(FP8(5.00), FP8(4.99)) = ")
mov eax, isequal(FP8(5.00), FP8(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isequal(FP10(5.00), FP10(4.99)) = ")
mov eax, isequal(FP10(5.00), FP10(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
print SADD("isnotequal(FP4(5.00), FP4(4.99)) = ")
mov eax, isnotequal(FP4(5.00), FP4(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isnotequal(FP8(5.00), FP8(4.99)) = ")
mov eax, isnotequal(FP8(5.00), FP8(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isnotequal(FP10(5.00), FP10(4.99)) = ")
mov eax, isnotequal(FP10(5.00), FP10(4.99))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
print SADD("isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01)) = ")
mov eax, isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01)) = ")
mov eax, isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01)) = ")
mov eax, isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
print SADD("isapproxequal(FP4(5.00), FP4(4.99), FP4(0.01)) = ")
mov eax, isapproxequal(FP4(5.00), FP4(4.99), FP4(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isapproxequal(FP8(5.00), FP8(4.99), FP8(0.01)) = ")
mov eax, isapproxequal(FP8(5.00), FP8(4.99), FP8(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD("isapproxequal(FP10(5.00), FP10(4.99), FP10(0.01)) = ")
mov eax, isapproxequal(FP10(5.00), FP10(4.99), FP10(0.01))
.if SDWORD PTR eax == -1
print ADDR szError
.elseif eax
print ADDR szTrue
.else
print ADDR szFalse
.endif
print SADD(10)
invoke ExitProcess, 0
end start
I don't quite understand this fascination with macros. These are perfectly good routines, why not make them callable, then call them? The cycle savings are marginal, given the blow-up in code size and its unnecessary effect on the L1 code cache, when a decent sized application get a hold of these macros.
Also, with all those finits, fwaits, and sahf's, the code is slow. Floating point comparisons can often be performed faster using integer instructions instead. For example, here is a routine to return greater=1, equal=0, less=-1:
__declspec( naked ) Integer fpcmp (double fpv, double fpw) {
_asm {
mov ecx, [esp+8] ; load left upper mantissa and exponent into ecx
mov edx, [esp+16] ; load right upper mantissa and exponent into edx
xor ecx, 80000000h ; flip signs
xor edx, 80000000h
cmp ecx, edx
je topeq ; branch if equal
noteq: sbb eax, eax ; eax = 0 for greater, -1 for less
add eax, eax ; eax = 0, -2
add eax, 1 ; return 1=greater, -1=less
ret
topeq: mov eax, [esp + 4]
sub eax, [esp + 12]
jne noteq
ret
}
}
Finally, the handwriting is on the wall--the FPU's days are numbered. Future cpus will have only SSE, and no more MMX or FPU. Get used to writing code that doesn't rely on NaN's and Infinities, as the standard approach by numerical software is to prevent such situations from occurring in the first place. Intel FPU architecture is antiquated and slow, so don't fall too in love with it...
Nice exercise Greg.
Generally, a float needs to be compared immediately after it has been computed. It would thus be more efficient to use its value while it is already on the FPU instead of storing it in some memory variable and then bring it back on the FPU for comparison.
Some of your code could also be simplified. For example, in the isapproxequal code, you don't care which base float is greater but you're only concerned if the absolute difference between them exceeds a given value. Thus, disregard comparing the two base floats; simply subtract one from the other and use the fabs instruction.
Raymond
Raymond,
Thanks for the suggestions, I wasn't really concerned about speed with these. I just wanted something that worked. After I wrote procedure versions of them, I converted them to macros. I modeled them after C99 macros that have the same names, the same arguments and the same return values. I looked at your FpuComp procedure, but I wanted something that would stand on it's own. I am going to take your suggestion and make the change to use fabs. :thumbu
Codewarp,
Hey hotshot. I do have a fascination with macros. I did write procedure versions of these. And I wasn't really concerned about speed.
If you use procedures you need a separate set of procedures for each data type. These macros will accept all three types.
I am well aware of the future direction of floating-point but I think you are a being bit premature about not using FPU and MMX.
If you don't mind them being 686+ you could use the fcomi instruction rather than fcom/fstsw/sahf. I guess you could have the macro detect the processor and use the fcomi instruction if the processor mode allowed it to cover all bases.
And you can eliminate the jz instruction from several of the macros by using either jae, or jbe. Although I can see you probably did it the way you did for consistancy rather than speed.
Mirno
isapproxequal using fabs:
isapproxequal MACRO r1:req, r2:req, tolerance:req
LOCAL diff
.data?
diff REAL10 ?
.code
finit
fld r2
fld r1
fsub
fabs
fstp diff
fwait
EXITM <islessequal(diff, tolerance)>
ENDM
Mirno,
Yeah, I thought about using fcomi but decided to go with fcom. There's not that much difference between the two. I'll eliminate the jz instructions. Thanks.
A side effect of using fabs? This started happening when I switched to using fabs.
This is coming up false, was true, there is a rounding error now.
isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01))
This is coming up false, was true, I can't see why.
isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01))
this comes up true, was true.
isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01))
What's going on here? If I switch back to the old macro it works as I would expect.
Floating point isn't exact, you may find that 4.99 is 4.989999 say, you are testing right on the limit so the inherant inaccuracy of floating point values may show up.
Mirno
Mirno,
Yep, it's due to rounding errors. I just cant see why the rounding errors changed when using the fabs version, the numbers are the same. Oh well, time to move on.