News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Bin2Time, Converts binary value to hours:min:sec.000

Started by Tight_Coder_Ex, February 01, 2008, 10:54:48 PM

Previous topic - Next topic

donkey

Quote from: NightWare on February 17, 2008, 02:34:29 AM
donkey,
few instructions removed, if it was a signature then forget this post :
...

Thanks NightWare,

I have to admit that I didn't spend a lot of time on the algorithm so didn't do too much to optimize it or reduce it, I threw it together and tested it. By the way there is a bug in it if the fraction is less than 100, you have to add the following lines to the fraction section...

// Fraction
mov eax,1000
mul edx
sub esi,eax
mov eax,esi
mov edx,eax
cmp eax,99 ; <<<<<<<<<<<<<<<< Add lines from here
jbe >
mov edx, 3435973837
mul edx
shr edx, 3
: ; <<<<<<<<<<<<<<<<<<<<<<<< to here

mov eax,edx
aam
add eax,3030h
bswap eax
shr eax,16
mov [szFrac],ax


Donkey
"Ahhh, what an awful dream. Ones and zeroes everywhere...[shudder] and I thought I saw a two." -- Bender
"It was just a dream, Bender. There's no such thing as two". -- Fry
-- Futurama

Donkey's Stable

lingo

Thanks Michael,
Here is something faster...

[attachment deleted by admin]

MichaelW

Thanks lingo,

I would like to see the results, but apparently the synthesized paddq, pmuludq, and psubq are SSE2 instructions that will not run on my P3, and my P4 is not available.
eschew obfuscation

NightWare

#33
Quote from: MichaelW on February 16, 2008, 02:51:27 AM
Or at least all data except byte-size data that is accessed as bytes, and I failed to do that for yours and lingo's data, and Donkey's data that is accessed as words.
:lol understood later... (i know, it tooks me some times  :()

edit :
lingo's algo, without aam :
.data
ALIGN 16
Mask01 DWORD 000000000h,0FFFF0000h
Mask03 DWORD 000000000h,0FFFFFFFFh
Mask04 DWORD 0FFFFFFFFh,000000000h
Mask05 DWORD 0FFFF0000h,0FFFFFFFFh
Mask07 DWORD 003FFFFFFh,000000000h
PdDiv DWORD 00031B5D4h,000000000h ; 31B5D4
PwMul10 DWORD 0000A000Ah,0000A000Ah
PwDiv10 DWORD 0199A199Ah,0199A199Ah ; tester de 199a à 199d
Mask0Fh DWORD 0000F000Fh,0000F000Fh
MaskF0h DWORD 00F000F00h,00F000F00h
Mask30h DWORD 030303030h,030303030h

.code
ALIGN 16
;
Routine5 PROC

mov eax,[esp+1*4] ; eax-> time in miliseconds
pxor MM0,MM0
movd MM1,eax
pmuludq MM1,QWORD PTR [PdDiv]
movq MM0,MM1
pand MM0,QWORD PTR [Mask01]
pxor MM1,MM0
psrlq MM1,13
movq MM2,MM1
paddq MM1,MM1
paddq MM1,MM2
por MM0,MM1
pand MM1,QWORD PTR [Mask04]
pand MM0,QWORD PTR [Mask03]
movq MM2,MM1
psrlq MM2,4
psubq MM1,MM2
movq MM2,MM1
psrlq MM2,10             
por MM0,MM2
pand MM0,QWORD PTR [Mask05]
pand MM1,QWORD PTR [Mask07]
movq MM2,MM1
psrlq MM2,4
psubq MM1,MM2
psrlq MM1,20
por MM0,MM1
movq MM1,MM0
pmulhuw MM0,QWORD PTR [PwDiv10] ; div 10 (vérifié de 0 à 99)
movq MM2,MM0
pmullw MM2,QWORD PTR [PwMul10]
psllw MM0,8
psubw MM1,MM2
pand MM1,QWORD PTR [Mask0Fh]
pand MM0,QWORD PTR [MaskF0h]
por MM0,MM1
paddw MM0,QWORD PTR [Mask30h]
movd eax,MM0
bswap eax
mov WORD PTR [szMinL], ax
shr eax,16
mov WORD PTR [szSecL], ax
punpckhdq MM0,MM0
movd eax,MM0
bswap eax
mov WORD PTR [szDaysL], ax
shr eax,16
mov WORD PTR [szHoursL], ax

ret
Routine5 ENDP


lingo

"...paddq, pmuludq, and psubq are SSE2 instructions that will not run on my P3, and my P4 is not available."

OK, the SSE2 instructions are slower here... :lol
So, I rewrote my "general" assembly proc d2dtl  and I have new time: :lol
2604 cycles, Bin2Time
3362 cycles, UpTime
67 cycles, d2dtl-lingo
222 cycles, ConvertTime-NightWare
Press any key to exit...


Regards,
Lingo



[attachment deleted by admin]

NightWare


NightWare

here it's the classic alternative (35 clock cycles with my cpu) :
.DATA
ALIGN 16
szDays  DB "00 Days "
szHours DB "00:"
szMins  DB "00:"
szSecs  DB "00."
szFrac  DB 0,0,0,0,0,0


.CODE
ALIGN 16
;
; syntax :
; mov eax,Ticks
; call GetElapsTime
;
GetElapsTime PROC
push esi
push edi

; div by 1000 (to reduce the value to days/hours/mins/secs)
mov edx,2199023256 ;; ) div 1000 (verified from 0 to FFFFFFFFh included)
mul edx ;; )
shr edx,9 ;; )
mov edi,edx ;; save it in edi

; get number of days+hours
mov eax,2443359173 ;; ) div 3600 (mins+secs) (verified from 0 to 4320000 included)
mul edx ;; )
shr edx,11 ;; )
mov esi,edx ;; save days+hours in esi
; get number of mins+secs
mov eax,3600 ;; ) mul 3600
mul edx ;; )
sub edi,eax ;; substract days+hours (mins+secs in edi)

; get number of days
mov eax,178956971 ;; ) div 24 (hours) (verified from 0 to 1200)
mul esi ;; )
mov ecx,edx ;; save days in a unused reg
; get number of hours
lea edx,[edx+edx*2] ;; ) mul 24
shl edx,3 ;; )
sub esi,edx ;; substract days (hours in esi)

mov eax,429496730 ;; ) div 10
mul ecx ;; )
lea eax,[edx+edx*4] ;; ) mul 10
add eax,eax ;; )
add edx,3030h ;; +"00"
sub ecx,eax ;; keep units
shl ecx,8 ;; << units
add ecx,edx ;; units+tens
mov WORD PTR szDays,cx ;; write days

mov eax,429496730 ;; ) div 10
mul esi ;; )
lea eax,[edx+edx*4] ;; ) mul 10
add eax,eax ;; )
add edx,3030h ;; +"00"
sub esi,eax ;; keep units
shl esi,8 ;; << units
add esi,edx ;; units+tens
mov WORD PTR szHours,si ;; write hours

; get number of mins
mov eax,71582789 ;; ) div 60 (secs) (verified from 0 to 3600)
mul edi ;; )
mov ecx,edx ;; save mins in a unused reg
; get number of secs
lea edx,[edx+edx*4] ;; ) mul 60
lea edx,[edx+edx*2] ;; )
shl edx,2 ;; )
sub edi,edx ;; substract mins (secs in edi)

mov eax,429496730 ;; ) div 10
mul ecx ;; )
lea eax,[edx+edx*4] ;; ) mul 10
add eax,eax ;; )
add edx,3030h ;; +"00"
sub ecx,eax ;; keep units
shl ecx,8 ;; << units
add ecx,edx ;; units+tens
mov WORD PTR szMins,cx ;; write mins

mov eax,429496730 ;; ) div 10
mul edi ;; )
lea eax,[edx+edx*4] ;; ) mul 10
add eax,eax ;; )
add edx,3030h ;; +"00"
sub edi,eax ;; keep units
shl edi,8 ;; << units
add edi,edx ;; units+tens
mov WORD PTR szSecs,di ;; write secs

pop edi
pop esi
ret
GetElapsTime ENDP


as you can see, there is operations repeated 4 times before writting the values, 4 times ? hmm... it sound like a simd work, and here the sse2 Code (19 clock cycles with my cpu) :

.DATA
ALIGN 16
Simd_Dw_Div_24 DWORD 178956971,0
Simd_Dw_Div_60 DWORD 71582789,0
Simd_Dw_Div_1000 DWORD 2199023256,0
Simd_Dw_Div_3600 DWORD 2443359173,0
Simd_Dw_Mul_24 DWORD 24,0
Simd_Dw_Mul_60 DWORD 60,0
Simd_Dw_Mul_3600 DWORD 3600,0
Simd_Wds_Div_10 DWORD 199A199Ah,199A199Ah
Simd_Wds_Mul_10 DWORD 000A000Ah,000A000Ah
Simd_Wds_Val_30h DWORD 30303030h,30303030h
szDays  DB "00 Days "
szHours DB "00:"
szMins  DB "00:"
szSecs  DB "00."
szFrac  DB 0,0,0,0,0,0


.CODE
ALIGN 16
;
; syntax :
; mov eax,Ticks
; call Sse2_GetElapsTime
;
Sse2_GetElapsTime PROC
; push eax ;; empiler eax

; diviser par 1000 (pour réduire la valeur à jours/heures/minutes/secondes)
movd MM0,eax ;; placer le nombre de ticks dans MM0
pmuludq MM0,QWORD PTR Simd_Dw_Div_1000 ;; ) diviser par le nombre de millisecondes (1000) (vérifié de 0 à FFFFFFFFh)
psrlq MM0,9+32 ;; ) + >> 32
movq MM3,MM0 ;; copier le résultat dans MM3

; obtenir le nombre de jours et d'heures
pmuludq MM0,QWORD PTR Simd_Dw_Div_3600 ;; ) diviser par le nombre d'heures (3600) (vérifié de 0 à 4320000)
psrlq MM0,11+32 ;; ) + >> 32
movq MM1,MM0 ;; placer le nombre d'heures et de jours dans MM2

; obtenir le nombre de minutes et de secondes
pmuludq MM0,QWORD PTR Simd_Dw_Mul_3600 ;; multiplier par 3600 (60*60)
psubd MM3,MM0 ;; placer le nombre de minutes et de secondes dans MM4

; obtenir le nombre de jours
movq MM4,QWORD PTR Simd_Dw_Div_24 ;; ) diviser par le nombre d'heures (24) (vérifié de 0 à 1200)
pmuludq MM4,MM1 ;; )
psrlq MM4,32 ;; >> 32
movq MM2,MM4 ;; MM2 = nombre de jours

; obtenir le nombre d'heures
pmullw MM4,QWORD PTR Simd_Dw_Mul_24 ;; multiplier par le nombre d'heures
psubd MM1,MM4 ;; MM1 = nombre d'heures

; obtenir le nombre de minutes
movq MM5,QWORD PTR Simd_Dw_Div_60 ;; ) diviser par le nombre d'heures (24) (vérifié de 0 à 1200)
pmuludq MM5,MM3 ;; )
psrlq MM5,32 ;; >> 32
movq MM4,MM5 ;; MM4 = nombre de minutes

; obtenir le nombre de secondes
pmullw MM5,QWORD PTR Simd_Dw_Mul_60 ;; multiplier par le nombre de secondes
psubd MM3,MM5 ;; MM3 = nombre de secondes

; ici on va traiter les valeurs en parallèle
punpckldq MM4,MM3 ;; MM4 = 0,secondes,0,minutes
punpckldq MM2,MM1 ;; MM2 = 0,heures,0,jours
packuswb MM2,MM4 ;; MM2 = secondes+minutes+heures+jours

movq MM0,MM2 ;; MM0 = secondes+minutes+heures+jours
pmulhuw MM2,QWORD PTR Simd_Wds_Div_10 ;; diviser par 10

movq MM1,MM2 ;; copier les dizaines dans MM1
pmullw MM2,QWORD PTR Simd_Wds_Mul_10 ;; multiplier les dizaines par 10 (pour les soustraire après)

paddw MM1,QWORD PTR Simd_Wds_Val_30h ;; MM1 = _x_x_x_x (dizaines) + "00000000"
psubw MM0,MM2 ;; MM0 = _x_x_x_x (unitées)
psllw MM0,8 ;; << 8 pour les unitées
paddw MM0,MM1 ;; ajouter les dizaines +3030h aux unitées

; il ne reste plus qu'a écrire les valeurs
movd eax,MM0 ;; placer heures+jours dans eax
mov WORD PTR szDays,ax ;; sauvegarder le nombre de jours
shr eax,16 ;; >> 16
mov WORD PTR szHours,ax ;; sauvegarder le nombre d'heures
psrlq MM0,32 ;; >> 32 MM0 = 0,0,minutes,secondes
movd eax,MM0 ;; placer secondes+minutes dans eax
mov WORD PTR szMins,ax ;; sauvegarder le nombre de minutes
shr eax,16 ;; >> 16
mov WORD PTR szSecs,ax ;; sauvegarder le nombre de secondes

; pop eax ;; désempiler eax
ret ;; retourner (sortir de la procédure)
Sse2_GetElapsTime ENDP


now, this topic is really a lesson, not because of the speed (it really doesn't matter, especially for an algo like this one...), not because of the mul/div substitutions, not because of the bits manipulations, not because of the simd usage or any other optimization hint. but because when you code clear algos, you are ABLE TO SEE ALL THE POSSIBILITIES. and it's the most essential thing, far before any optimization hint.  :8)


NightWare

i fully agree, but it's the result returned by the speed test on my core2...
now, the alternative was (like is name say) another way, and i didn't said it was faster than your algo... now the sse2 "slower" version IS the final algo for speed test  :lol no manipulation from me here... every people can include the algos in the test... and you can also count the clock cycles for this one too  :lol