News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

LEA

Started by bomz, July 04, 2011, 10:00:07 PM

Previous topic - Next topic

bomz

try

lea     edx,[edx+esi+2]

bomz





not use eax in lea

bomz


ERNST

That's interesting.

Q6600:
QuoteCore Duo (2006+), MMX, SSE3
1033 1033 1034 1032 1033
1030 1032 1030 1032 1031
1031 1031 1032 1028 1032

bomz

code must consider old processor's first

dedndave

not everyone will agree with you on that   :P
there are good and bad things, either way

bomz

mov<movzx  ~2

qWord

Quote from: bomz on July 08, 2011, 12:35:17 PM
mov<movzx  ~2
apples <> oranges
using movzx avoid problems with partial register accesses.
You will find this information in both, Intel's and AMD's optimization manuals.
FPU in a trice: SmplMath
It's that simple!

bomz

xor ecx, ecx
@@:
mov cl, byte ptr [edx]

quickly even if string 1 sing

hutch--

bomz,

On most recent processors MOVZX is faster than XOR / MOV CL, [EDX]. You need to go back to a PIII to see MOVZX slower.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

bomz

I would back, but I have P4

xor do 1 time, mov each cycle

MichaelW

In my tests on my P3 movzx is ~1.5x faster.

;==============================================================================
    include \masm32\include\masm32rt.inc
    .686
    include \masm32\macros\timers.asm
;==============================================================================
printf MACRO format:REQ, args:VARARG
    IFNB <args>
        invoke crt_printf, cfm$(format), args
    ELSE
        invoke crt_printf, cfm$(format)
    ENDIF
    EXITM <>
ENDM
;==============================================================================
    .data
        x db 100 dup(0)
    .code
;==============================================================================
start:
;==============================================================================

    mov esi, OFFSET x

    invoke Sleep, 3000

    REPEAT 3

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
          @@:
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, loop only\n", eax )

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
            xor eax, eax
            xor ebx, ebx
            xor ecx, ecx
            xor edx, edx
          @@:
            mov al, BYTE PTR [esi+edi]
            mov bl, BYTE PTR [esi+edi+1]
            mov cl, BYTE PTR [esi+edi+2]
            mov dl, BYTE PTR [esi+edi+3]
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, xor + mov byte ptr\n", eax )

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
          @@:
            movzx eax, BYTE PTR [esi+edi]
            movzx ebx, BYTE PTR [esi+edi+1]
            movzx ecx, BYTE PTR [esi+edi+2]
            movzx edx, BYTE PTR [esi+edi+3]
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, movzx\n\n", eax )

    ENDM

    inkey "Press any key to exit..."
    exit
;==============================================================================
end start


37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

eschew obfuscation

bomz

xor do 1 time, mov each cycle

MichaelW

OK, so movzx is still ~1.5x faster.
eschew obfuscation

bomz