News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Floating-point comparison macros

Started by GregL, July 21, 2005, 01:31:39 AM

Previous topic - Next topic

GregL

I needed some floating-point comparison routines, so I wrote these macros. They work with REAL4, REAL8 and REAL10 memory variables. They seem to be working correctly. They return 1 if true, 0 if false and -1 on error.

; Floating-point comparison macros
; Greg Lyon - 2005
;
; Thanks to Raymond Filiatreault for
; the excellent tutorial 'Simply FPU'.
; It made writing these much easier.
;
.586
.MODEL FLAT, stdcall
option casemap:none

include windows.inc

include kernel32.inc
include user32.inc
include masm32.inc

include c:\masm32\macros\macros.asm

includelib kernel32.lib
includelib user32.lib
includelib masm32.lib

;------------------------------------------------------------
;------------------------------------------------------------
isgreater MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    true
    jb    false
    jz    false
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM
;------------------------------------------------------------
isgreaterequal MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    true
    jb    false
    jz    true
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM 
;;------------------------------------------------------------
isless MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    false
    jb    true
    jz    false
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM 
;;------------------------------------------------------------
islessequal MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    false
    jb    true
    jz    true
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM 
;;------------------------------------------------------------
islessgreater MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    true
    jb    true
    jz    false
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM 
;;------------------------------------------------------------
isequal MACRO r1:req, r2:req
    LOCAL error, true, false, clear
    finit
    fld r2
    fld r1
    fcom
    fstsw ax
    fwait
    sahf
    jpe   error
    ja    false
    jb    false
    jz    true
    error:
    mov eax, -1
    jmp clear
    true:
    mov eax, 1
    jmp clear
    false:
    xor eax, eax
    clear:
    fstp st(0)
    fstp st(0)
    EXITM <eax>
ENDM 
;------------------------------------------------------------
isnotequal MACRO r1:req, r2:req
    EXITM <islessgreater(r1, r2)>
ENDM
;------------------------------------------------------------
isapproxequal MACRO r1:req, r2:req, tolerance:req
    LOCAL diff
    .data?
        diff REAL10 ?
    .code   
    finit
    .IF isgreater(r1, r2)
        fld   r2
        fld   r1
    .ELSE
        fld   r1
        fld   r2
    .ENDIF   
    fsubp st(1), st(0)
    fstp  diff
    fwait
    EXITM <islessequal(diff, tolerance)>
ENDM
;------------------------------------------------------------

.DATA

    szTrue    BYTE    "True ", 10, 0
    szFalse   BYTE    "False", 10, 0
    szError   BYTE    "Error", 10, 0

.CODE

start:

    print SADD("isgreater(FP4(5.00), FP4(4.99))                   = ")           
    mov eax,    isgreater(FP4(5.00), FP4(4.99))       
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif   
   
    print SADD("isgreater(FP8(5.00), FP8(4.99))                   = ")           
    mov eax,    isgreater(FP8(5.00), FP8(4.99))       
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif   
   
    print SADD("isgreater(FP10(5.00), FP10(4.99))                 = ")           
    mov eax,    isgreater(FP10(5.00), FP10(4.99))       
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif 
   
    print SADD(10)
   
    print SADD("isless(FP4(5.00), FP4(4.99))                      = ")   
    mov eax,    isless(FP4(5.00), FP4(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax   
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif   
   
    print SADD("isless(FP8(5.00), FP8(4.99))                      = ")   
    mov eax,    isless(FP8(5.00), FP8(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax   
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif   
   
    print SADD("isless(FP10(5.00), FP10(4.99))                    = ")   
    mov eax,    isless(FP10(5.00), FP10(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax   
        print ADDR szTrue
    .else
        print ADDR szFalse 
    .endif     
   
    print SADD(10)

    print SADD("isequal(FP4(5.00), FP4(4.99))                     = ")
    mov eax,    isequal(FP4(5.00), FP4(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif
   
    print SADD("isequal(FP8(5.00), FP8(4.99))                     = ")
    mov eax,    isequal(FP8(5.00), FP8(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif
   
    print SADD("isequal(FP10(5.00), FP10(4.99))                   = ")
    mov eax,    isequal(FP10(5.00), FP10(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif   
   
    print SADD(10)
   
    print SADD("isnotequal(FP4(5.00), FP4(4.99))                  = ")   
    mov eax,    isnotequal(FP4(5.00), FP4(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif           

    print SADD("isnotequal(FP8(5.00), FP8(4.99))                  = ")   
    mov eax,    isnotequal(FP8(5.00), FP8(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif 
   
    print SADD("isnotequal(FP10(5.00), FP10(4.99))                = ")   
    mov eax,    isnotequal(FP10(5.00), FP10(4.99))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif     
   
    print SADD(10)
   
    print SADD("isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01))    = ")   
    mov eax,    isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif           
   
    print SADD("isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01))    = ")   
    mov eax,    isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif           
   
    print SADD("isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01)) = ")   
    mov eax,    isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif         
   
    print SADD(10)
 
    print SADD("isapproxequal(FP4(5.00), FP4(4.99), FP4(0.01))    = ")
    mov eax,    isapproxequal(FP4(5.00), FP4(4.99), FP4(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif           
   
    print SADD("isapproxequal(FP8(5.00), FP8(4.99), FP8(0.01))    = ")
    mov eax,    isapproxequal(FP8(5.00), FP8(4.99), FP8(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif           
   
    print SADD("isapproxequal(FP10(5.00), FP10(4.99), FP10(0.01)) = ")
    mov eax,    isapproxequal(FP10(5.00), FP10(4.99), FP10(0.01))
    .if SDWORD PTR eax == -1
        print ADDR szError
    .elseif eax
        print ADDR szTrue
    .else
        print ADDR szFalse
    .endif               
   
    print SADD(10)
   
    invoke ExitProcess, 0

end start


Codewarp

I don't quite understand this fascination with macros.  These are perfectly good routines, why not make them callable, then call them?  The cycle savings are marginal, given the blow-up in code size and its unnecessary effect on the L1 code cache, when a decent sized application get a hold of these macros.

Also, with all those finits, fwaits, and sahf's, the code is slow.  Floating point comparisons can often be performed faster using integer instructions instead.  For example, here is a routine to return greater=1, equal=0, less=-1:


__declspec( naked ) Integer fpcmp (double fpv, double fpw) {
    _asm {
         mov     ecx, [esp+8]           ; load left upper mantissa and exponent into ecx
         mov     edx, [esp+16]          ; load right upper mantissa and exponent into edx
         xor     ecx, 80000000h         ; flip signs
         xor     edx, 80000000h
         cmp     ecx, edx
         je      topeq                  ; branch if equal
noteq:   sbb     eax, eax               ; eax = 0 for greater, -1 for less
         add     eax, eax               ; eax = 0, -2
         add     eax, 1                 ; return 1=greater, -1=less
         ret
topeq:   mov     eax, [esp + 4]
         sub     eax, [esp + 12]
         jne     noteq
         ret
    }
}


Finally, the handwriting is on the wall--the FPU's days are numbered.  Future cpus will have only SSE, and no more MMX or FPU.  Get used to writing code that doesn't rely on NaN's and Infinities, as the standard approach by numerical software is to prevent such situations from occurring in the first place.  Intel FPU architecture is antiquated and slow, so don't fall too in love with it...

raymond

Nice exercise Greg.

Generally, a float needs to be compared immediately after it has been computed. It would thus be more efficient to use its value while it is already on the FPU instead of storing it in some memory variable and then bring it back on the FPU for comparison.

Some of your code could also be simplified. For example, in the isapproxequal code, you don't care which base float is greater but you're only concerned if the absolute difference between them exceeds a given value. Thus, disregard comparing the two base floats; simply subtract one from the other and use the fabs instruction.

Raymond
When you assume something, you risk being wrong half the time
http://www.ray.masmcode.com

GregL

Raymond,

Thanks for the suggestions, I wasn't really concerned about speed with these. I just wanted something that worked. After I wrote procedure versions of them, I converted them to macros. I modeled them after C99 macros that have the same names, the same arguments and the same return values. I looked at your FpuComp procedure, but I wanted something that would stand on it's own. I am going to take your suggestion and make the change to use fabs.  :thumbu


GregL

#4
Codewarp,

Hey hotshot. I do have a fascination with macros. I did write procedure versions of these. And I wasn't really concerned about speed.

If you use procedures you need a separate set of procedures for each data type. These macros will accept all three types.

I am well aware of the future direction of floating-point but I think you are a being bit premature about not using FPU and MMX.



Mirno

If you don't mind them being 686+ you could use the fcomi instruction rather than fcom/fstsw/sahf. I guess you could have the macro detect the processor and use the fcomi instruction if the processor mode allowed it to cover all bases.

And you can eliminate the jz instruction from several of the macros by using either jae, or jbe. Although I can see you probably did it the way you did for consistancy rather than speed.

Mirno

GregL

isapproxequal using fabs:

isapproxequal MACRO r1:req, r2:req, tolerance:req
    LOCAL diff
    .data?
        diff REAL10 ?
    .code   
    finit
    fld   r2
    fld   r1
    fsub
    fabs
    fstp  diff
    fwait
    EXITM <islessequal(diff, tolerance)>
ENDM


GregL

Mirno,

Yeah, I thought about using fcomi but decided to go with fcom. There's not that much difference between the two. I'll eliminate the jz instructions. Thanks.


GregL

A side effect of using fabs? This started happening when I switched to using fabs.

This is coming up false, was true, there is a rounding error now.

isapproxequal(FP4(4.99), FP4(5.00), FP4(0.01))


This is coming up false, was true, I can't see why.

isapproxequal(FP10(4.99), FP10(5.00), FP10(0.01))


this comes up true, was true.

isapproxequal(FP8(4.99), FP8(5.00), FP8(0.01))


What's going on here? If I switch back to the old macro it works as I would expect.



Mirno

Floating point isn't exact, you may find that 4.99 is 4.989999 say, you are testing right on the limit so the inherant inaccuracy of floating point values may show up.

Mirno

GregL

Mirno,

Yep, it's due to rounding errors. I just cant see why the rounding errors changed when using the fabs version, the numbers are the same. Oh well, time to move on.