The prototypes for Streaming SIMD Extensions intrinsics are in the xmmintrin.h header file.
Intrinsic Name |
Operation | Corresponding Instruction |
---|---|---|
_mm_shuffle_ps | Shuffle | SHUFPS |
_mm_unpackhi_ps | Unpack High | UNPCKHPS |
_mm_unpacklo_ps | Unpack Low | UNPCKLPS |
_mm_loadh_pi | Load High | MOVHPS reg, mem |
_mm_storeh_pi | Store High | MOVHPS mem, reg |
_mm_movehl_ps | Move High to Low | MOVHLPS |
_mm_movelh_ps | Move Low to High | MOVLHPS |
_mm_loadl_pi | Load Low | MOVLPS reg, mem |
_mm_storel_pi | Store Low | MOVLPS mem, reg |
_mm_movemask_ps | Create four-bit mask | MOVMSKPS |
__m128 _mm_shuffle_ps(__m128 a, __m128 b, unsigned int imm8)
Selects four specific SP FP values from a and b, based on the mask imm8. The mask must be an immediate. See Macro Function for Shuffle Using Streaming SIMD Extensions for a description of the shuffle semantics.
__m128 _mm_unpackhi_ps(__m128 a, __m128 b)
Selects and interleaves the upper two SP FP values from a and b.
r0 := a2
r1 := b2
r2 := a3
r3 := b3
__m128 _mm_unpacklo_ps(__m128 a, __m128 b)
Selects and interleaves the lower two SP FP values from a and b.
r0 := a0
r1 := b0
r2 := a1
r3 := b1
__m128 _mm_loadh_pi(__m128, __m64 const *p)
Sets the upper two SP FP values with 64 bits of data loaded from the address p.
r0 := a0
r1 := a1
r2 := *p0
r3 := *p1
void _mm_storeh_pi(__m64 *p, __m128 a)
Stores the upper two SP FP values to the address p.
*p0 := a2
*p1 := a3
__m128 _mm_movehl_ps(__m128 a, __m128 b)
Moves the upper 2 SP FP values of b to the lower 2 SP FP values of the result. The upper 2 SP FP values of a are passed through to the result.
r3 := a3
r2 := a2
r1 := b3
r0 := b2
__m128 _mm_movelh_ps(__m128 a, __m128 b)
Moves the lower 2 SP FP values of b to the upper 2 SP FP values of the result. The lower 2 SP FP values of a are passed through to the result.
r3 := b1
r2 := b0
r1 := a1
r0 := a0
__m128 _mm_loadl_pi(__m128 a, __m64 const *p)
Sets the lower two SP FP values with 64 bits of data loaded from the address p; the upper two values are passed through from a.
r0 := *p0
r1 := *p1
r2 := a2
r3 := a3
void _mm_storel_pi(__m64 *p, __m128 a)
Stores the lower two SP FP values of a to the address p.
*p0 := a0
*p1 := a1
int _mm_movemask_ps(__m128 a)
Creates a 4-bit mask from the most significant bits of the four SP FP values.
r := sign(a3)<<3 | sign(a2)<<2 | sign(a1)<<1 | sign(a0)