want cycle
cycle:
mov al,[esi]
cmp al,0
jz cycle_end
mov [edi],al
inc esi
inc edi
jmp cycle
cycle_end:
How to best optimize this cycle ( 0 no store!!!)
I want to use while/repeat/until, but allways masm add extra JUMP
in my opinion it is the minimum instructions that one can use,
but about timing, if you use "add" instead of "inc" it will be faster (this is applicable in P4)
cycle:
mov al,[esi]
cmp al,0
jz cycle_end
mov [edi],al
add esi,1
add edi,1
jmp cycle
cycle_end:
ok. but this code with high level syntax
.repeat
mov al,[esi]
.if al==0
.break
end if
mov [edi],al
inc esi
inc edi
.until 0
You can do it with high level syntax but it is harder to know what's under the hood without creating a listing.
What you're looking at... speed or readability ?
Speed...
sub esi, edi
cycle:
mov eax, [esi+edi]
test eax, 0FFh
jz cycle_end
test eax, 0FF00h
jz cycle_end_1
test eax, 0FF0000h
jz cycle_end_2
test eax, 0FF000000h
jz cycle_end_3
mov [edi], eax
add edi, 4
jmp cycle
cycle_end_1:
mov [edi], al
jmp cycle_end
cycle_end_2:
mov [edi], ax
jmp cycle_end
cycle_end_3:
mov [edi], ax
shr eax, 16
mov [edi+2], al
cycle_end:
Readability...
sub esi, edi
.repeat
mov al,[esi+edi]
.if !(al & al)
.break
.endif
mov [edi],al
add edi, 1
.until 0
Nice :clap:
but also added extra jump
0489 66| 2B F7 sub esi, edi
.repeat
048C *@C0001:
048C 67& 8A 04 37 mov al,[esi+edi]
.if !(al & al)
0490 84 C0 * test al, al
0492 75 02 * jne @C0002 <------
.break
0494 EB 09 * jmp @C0005 <------
.endif
0496 *@C0002:
0496 67& 88 07 mov [edi],al
0499 66| 83 C7 01 add edi, 1
.until 0
049D EB ED * jmp @C0001
049F *@C0005:
untested, but should be quite fast (when esi is dword aligned):
mov ecx, 01010101h
mov edx, 80808080h
@@cycle4:
mov eax, [esi]
mov ebx, eax
mov ebp, eax
not eax
sub ebx, ecx
and eax, edx
and eax, ebx
jnz @@almostdone
mov [edi], ebp
add esi, 4
add edi, 4
jmp @@cycle4
@@almostdone:
mov al, [esi]
test al, al
jz @@done
mov [edi], al
add esi, 1
add edi, 1
jmp @@almostdone
@@done:
the basic idea is that ((x - 0x01010101) & ~x & 0x80808080) returns nonzero if x has a nonzero byte
Here are a couple of choices.
=================================
same logic
=================================
sub esi, 1
sub edi, 1
cycle:
add esi, 1
add edi, 1
movzx eax, BYTE PTR [esi]
test eax, eax
jz cycle_end
mov [edi],al
jmp cycle
cycle_end:
=================================
extra write to EDI
=================================
sub esi, 1
sub edi, 1
cycle:
add esi, 1
add edi, 1
movzx eax, BYTE PTR [esi]
mov [edi],al
test eax, eax
jnz cycle
Quote from: korte on January 08, 2008, 05:48:07 PM
.repeat
mov al,[esi]
.if al==0
.break
end if
mov [edi],al
inc esi
inc edi
.until 0
You want to use the high level syntax and avoid the extra jump !!!
There it is...
sub esi, edi
.while 1
mov al, [esi+edi]
.break .if !(al & al)
mov [edi], al
add edi, 1
.endw
:00401000 2BF7 sub esi, edi
:00401002 8A0437 mov al, byte ptr [esi+edi]
:00401005 84C0 test al, al
:00401007 7407 jz 00401010
:00401009 8807 mov byte ptr [edi], al
:0040100B 83C701 add edi, 00000001
:0040100E EBF2 jmp 00401002
:00401010
:8)
Quote from: jdoe on January 09, 2008, 12:51:46 AM
sub esi, edi
it seams there is people who learn things quickly :lol
a mix hutch/jdoe to remove the jmp instruction and 1 inc of the loop :
sub esi,edi
jmp cycle2
cycle:
mov [edi],al
inc edi
Cycle2:
mov al,[esi+edi]
test al,al
jnz cycle
cycle_end: ; not needed
sub esi,edi
jmp cycle2
cycle:
mov [edi],al
inc edi
Cycle2:
mov al,[esi+edi]
test al,al
jnz cycle
NICE :U :clap:
high level implentation? :bg
Here is a variation.
sub esi,edi
sub edi, 1
align 4
cycle:
mov [edi],al
add edi, 1
movzx eax, BYTE PTR [esi+edi]
test eax, eax
jnz cycle
Quote from: hutch-- on January 09, 2008, 06:13:57 AM
Here is a variation.
sub esi,edi
sub edi, 1
align 4
cycle:
mov [edi],al
add edi, 1
movzx eax, BYTE PTR [esi+edi]
test eax, eax
jnz cycle
Hmmm...
Where the first "al" in "mov [edi],al" comes from ::)
-----
Now that topic could turn very funny (or weird) if we use timers.asm from MichaelW and add a timing to all of them :green2
JD,
It probably does not matter as it just allows the code to fall through without the JMP to start it. Thats why the SUB EDI, 1.
Quote from: hutch-- on January 09, 2008, 06:42:56 AM
JD,
It probably does not matter as it just allows the code to fall through without the JMP to start it. Thats why the SUB EDI, 1.
I see... but to be a good solution, the byte before [edi] is not to be used.
Quote from: korte on January 09, 2008, 04:19:05 AM
high level implentation? :bg
houla... i can't help you here... i've never used the high level syntax... :red
Quote from: korte on January 08, 2008, 04:59:25 PM
cycle:
mov al,[esi]
cmp al,0
jz cycle_end
mov [edi],al
inc esi
inc edi
jmp cycle
cycle_end:
My two bits worth, provided processor is compatible
cycle:
cmp byte ptr [esi], 0
jz cycle_end
movsb
jmp cycle
cycle_end:
Quote