LEA

bomz · July 07, 2011, 03:12:23 PM

try

lea edx,[edx+esi+2]

bomz · July 07, 2011, 03:17:31 PM

not use eax in lea

bomz · July 07, 2011, 03:20:47 PM

*

ERNST · July 07, 2011, 04:15:07 PM

That's interesting.

Q6600:

QuoteCore Duo (2006+), MMX, SSE3
1033 1033 1034 1032 1033
1030 1032 1030 1032 1031
1031 1031 1032 1028 1032

bomz · July 07, 2011, 04:19:56 PM

code must consider old processor's first

dedndave · July 08, 2011, 02:09:04 AM

not everyone will agree with you on that :P
there are good and bad things, either way

bomz · July 08, 2011, 12:35:17 PM

mov<movzx ~2

qWord · July 08, 2011, 01:04:50 PM

Quote from: bomz on July 08, 2011, 12:35:17 PM
mov<movzx ~2

apples <> oranges
using movzx avoid problems with partial register accesses.
You will find this information in both, Intel's and AMD's optimization manuals.

bomz · July 08, 2011, 03:11:02 PM

xor ecx, ecx
@@:
mov cl, byte ptr [edx]
quickly even if string 1 sing

hutch-- · July 08, 2011, 03:35:15 PM

bomz,

On most recent processors MOVZX is faster than XOR / MOV CL, [EDX]. You need to go back to a PIII to see MOVZX slower.

bomz · July 08, 2011, 03:38:28 PM

I would back, but I have P4

xor do 1 time, mov each cycle

MichaelW · July 08, 2011, 04:52:38 PM

In my tests on my P3 movzx is ~1.5x faster.

Code Select


;==============================================================================
    include \masm32\include\masm32rt.inc
    .686
    include \masm32\macros\timers.asm
;==============================================================================
printf MACRO format:REQ, args:VARARG
    IFNB <args>
        invoke crt_printf, cfm$(format), args
    ELSE
        invoke crt_printf, cfm$(format)
    ENDIF
    EXITM <>
ENDM
;==============================================================================
    .data
        x db 100 dup(0)
    .code
;==============================================================================
start:
;==============================================================================

    mov esi, OFFSET x

    invoke Sleep, 3000

    REPEAT 3

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
          @@:
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, loop only\n", eax )

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
            xor eax, eax
            xor ebx, ebx
            xor ecx, ecx
            xor edx, edx
          @@:
            mov al, BYTE PTR [esi+edi]
            mov bl, BYTE PTR [esi+edi+1]
            mov cl, BYTE PTR [esi+edi+2]
            mov dl, BYTE PTR [esi+edi+3]
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, xor + mov byte ptr\n", eax )

        counter_begin 1000, HIGH_PRIORITY_CLASS
            mov edi, 16
          @@:
            movzx eax, BYTE PTR [esi+edi]
            movzx ebx, BYTE PTR [esi+edi+1]
            movzx ecx, BYTE PTR [esi+edi+2]
            movzx edx, BYTE PTR [esi+edi+3]
            sub edi, 1
            jnz @B
        counter_end
        printf( "%d cycles, movzx\n\n", eax )

    ENDM

    inkey "Press any key to exit..."
    exit
;==============================================================================
end start

Code Select


37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

37 cycles, loop only
85 cycles, xor + mov byte ptr
68 cycles, movzx

bomz · July 08, 2011, 05:29:39 PM

xor do 1 time, mov each cycle

MichaelW · July 08, 2011, 05:38:43 PM

OK, so movzx is still ~1.5x faster.

bomz · July 08, 2011, 05:47:51 PM

mov

News:

LEA