I am looking for any documentation on the current sse instructions and their implementation in HLA. Any help or web addresses would be wonderful. Thank you very much for any help you can provide!
Quote from: Sundervine on August 31, 2008, 09:55:03 PM
I am looking for any documentation on the current sse instructions and their implementation in HLA. Any help or web addresses would be wonderful. Thank you very much for any help you can provide!
HLA supports all the SSE instructions up to (but not including) the very latest instructions (like blend*). HLA v1.105 will add support for the brand-new instructions just added.
I believe the HLA documentation is up-to-date on the instructions supported. Of course, it's only a reference providing the syntax, it does not describe how to actually use the instructions.
hLater,
Randy Hyde
For the general usage os SSE instructions, I guess the Intel manuals would be helpful (free 6 pdf files available for download)
Thank you very much for the information. I will download the intel information as soon as find them. Again thank you both!
I noticed a few other people seem to be looking for this information so I will at least put the HLA Information here.
SSE Instructions
HLA supports the following SSE and SSE/2 instructions found on the Pentium III, IV, and later processors (note that some instructions are only available on Pentium IV and later processors; see the Intel reference manuals for details):
HLA uses the symbols xmm0, xmm1, ..., xmm7 for the SSE register set.
SSE Instrs:
addsd( sseReg/mem128, sseReg );
addpd( sseReg/mem128, sseReg );
addps( sseReg/mem128, sseReg );
addss( sseReg/mem128, sseReg );
andnpd( sseReg/mem128, sseReg );
andnps( sseReg/mem128, sseReg );
andpd( sseReg/mem128, sseReg );
andps( sseReg/mem128, sseReg );
clflush( mem8 );
cmppd( imm8, sseReg/mem128, sseReg );
cmpps( imm8, sseReg/mem128, sseReg );
cmpsdp( imm8, sseReg/mem64, sseReg );
cmpss( imm8, sseReg/mem32, sseReg );
cmpeqss( sseReg, sseReg );
cmpltss( sseReg, sseReg );
cmpless( sseReg, sseReg );
cmpneqss( sseReg, sseReg );
cmpnlts( sseReg, sseReg );
cmpnles( sseReg, sseReg );
cmpords( sseReg, sseReg );
cmpunordss( sseReg, sseReg );
cmpeqsd( sseReg, sseReg );
cmpltsd( sseReg, sseReg );
cmplesd( sseReg, sseReg );
cmpneqsd( sseReg, sseReg );
cmpnlts( sseReg, sseReg );
cmpnles( sseReg, sseReg );
cmpords( sseReg, sseReg );
cmpunords( sseReg, sseReg );
cmpeqps( sseReg, sseReg );
cmpltps( sseReg, sseReg );
cmpleps( sseReg, sseReg );
cmpneqps( sseReg, sseReg );
cmpnltp( sseReg, sseReg );
cmpnleps( sseReg, sseReg );
cmpordps( sseReg, sseReg );
cmpunordps( sseReg, sseReg );
cmpeqpd( sseReg, sseReg );
cmpltpd( sseReg, sseReg );
cmplepd( sseReg, sseReg );
cmpneqpd( sseReg, sseReg );
cmpnltpd( sseReg, sseReg );
cmpnlepd( sseReg, sseReg );
cmpordpd( sseReg, sseReg );
cmpunordpd( sseReg, sseReg );
comisd( sseReg/mem64, sseReg );
comiss( sseReg/mem32, sseReg );
cvtdq2pd( sseReg/mem64, sseReg );
cvtdq2pq
cvtdq2ps( sseReg/mem128, sseReg );
cvtpd2dq( sseReg/mem128, sseReg );
cvtpd2pi( sseReg/mem128, mmxReg );
cvtpd2ps( sseReg/mem128, sseReg );
cvtpi2pd( sseReg/mem64, sseReg );
cvtpi2ps( sseReg/mem64, sseReg );
cvtpi2ss
cvtps2dq( sseReg/mem128, sseReg );
cvtps2pd( sseReg/mem64, sseReg );
cvtps2pi( sseReg/mem64, sseReg );
cvtsd2si( sseReg/mem64, Reg32 );
cvtsi2sd( Reg32/mem32, sseReg );
cvtsi2ss( sseReg/mem64, sseReg );
cvtss2sd( sseReg/mem32, sseReg );
cvtsd2ss( Reg32/mem32, sseReg );
cvtss2si( sseReg/mem32, Reg32 );
cvttpd2pi( sseReg/mem128, mmxReg );
cvttpd2dq( sseReg/mem128, sseReg );
cvttps2dq( sseReg/mem128, sseReg );
cvttps2pi( sseReg/mem64, mmxReg );
cvttsd2si( sseReg/mem64, Reg32 );
cvttss2si( sseReg/mem32, Reg32 );
divpd( sseReg/mem128, sseReg );
divps( sseReg/mem128, sseReg );
divsd( sseReg/mem64, sseReg );
divss( sseReg/mem32, sseReg );
fxsave( mem512 );
fxrstor( mem512 );
ldmxcsr( mem32 );
lfence
maskmovdqu( sseReg, sseReg );
maskmovq( mmxReg, mmxReg );
maxpd( sseReg/mem128, sseReg );
maxps( sseReg/mem128, sseReg );
maxsd( sseReg/mem64, sseReg );
maxss( sseReg/mem32, sseReg );
mfence
minpd( sseReg/mem128, sseReg );
minps( sseReg/mem128, sseReg );
minsd( sseReg/mem64, sseReg );
minss( sseReg/mem32, sseReg );
movapd( sseReg/mem128, sseReg );
movapd( sseReg, sseReg/mem128 );
movaps( sseReg/mem128, sseReg );
movaps( sseReg, sseReg/mem128 );
movdqa( sseReg/mem128, sseReg );
movdqa( sseReg, sseReg/mem128 );
movdqu( sseReg/mem128, sseReg );
movdqu( sseReg, sseReg/mem128 );
movdq2q( sseReg, mmxReg );
movhlps( sseReg, sseReg );
movhpd( mem64, sseReg );
movhpd( sseReg, mem64 );
movhps( mem64, sseReg );
movhps( sseReg, mem64 );
movlpd( mem64, sseReg );
movlpd( sseReg, mem64 );
movlps( mem64, sseReg );
movlps( sseReg, mem64 );
movlhps( sseReg, sseReg );
movmskpd( sseReg, Reg32 );
movmskps( sseReg, Reg32 );
movnti( Reg32, mem32 );
movntpd( sseReg, mem128 );
movntps( sseReg, mem128 );
movntq( mmxReg, mem64 );
movntdq( sseReg, mem128 );
movq2dq( mmxReg, sseReg );
movsdp( sseReg, sseReg );
movsdp( mem64, sseReg );
movsdp( sseReg, mem64 );
movss( sseReg, sseReg );
movss( mem32, sseReg );
movss( sseReg, mem32 );
movupd( sseReg, sseReg );
movupd( sseReg, mem128 );
movupd( mem128, sseReg );
movups( sseReg, sseReg );
movups( sseReg, mem128 );
movups( mem128, sseReg );
mulpd( sseReg/mem128, sseReg );
mulps( sseReg/mem128, sseReg );
mulss( sseReg/mem32, sseReg );
mulsd( sseReg/mem64, sseReg );
orpd( sseReg/mem128, sseReg );
orps( sseReg/mem128, sseReg );
pause
pmuludq( mmxReg/mem64, mmxReg );
pmuludq( sseReg/mem128, sseReg );
prefetcht0( mem8 );
prefetcht1( mem8 );
prefetcht2( mem8 );
prefetchnta( mem8 );
pshufd( imm8, sseReg/mem128, sseReg );
pslldq( imm8, sseReg );
psrldq( imm8, sseReg );
punpckhqdq( sseReg/mem128, sseReg );
punpcklqdq( sseReg/mem128, sseReg );
rcpps( sseReg/mem128, sseReg );
rcpss( sseReg/mem128, sseReg );
rsqrtps( sseReg/mem128, sseReg );
rsqrtss( sseReg/mem32, sseReg );
sfence;
shufpd( imm8, sseReg/mem128, sseReg );
shufps( imm8, sseReg/mem128, sseReg );
sqrtpd( sseReg/mem128, sseReg );
sqrtps( sseReg/mem128, sseReg );
sqrtsd( sseReg/mem64, sseReg );
sqrtss( sseReg/mem32, sseReg );
stmxcsr( mem32 );
subps( sseReg/mem128, sseReg );
subpd( sseReg/mem128, sseReg );
subsd( sseReg/mem64, sseReg );
subss( sseReg/mem32, sseReg );
ucomisd( sseReg/mem64, sseReg );
ucomiss( sseReg/mem32, sseReg );
unpckhpd( sseReg/mem128, sseReg );
unpckhps( sseReg/mem128, sseReg );
unpcklpd( sseReg/mem128, sseReg );
unpcklps( sseReg/mem128, sseReg );
xorpd( sseReg/mem128, sseReg );
xorps( sseReg/mem128, sseReg );