xref: /freebsd-src/contrib/llvm-project/clang/lib/Headers/mmintrin.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #ifndef __MMINTRIN_H
110b57cec5SDimitry Andric #define __MMINTRIN_H
120b57cec5SDimitry Andric 
13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__)
14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture"
15349cc55cSDimitry Andric #endif
16349cc55cSDimitry Andric 
170b57cec5SDimitry Andric typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric typedef long long __v1di __attribute__((__vector_size__(8)));
200b57cec5SDimitry Andric typedef int __v2si __attribute__((__vector_size__(8)));
210b57cec5SDimitry Andric typedef short __v4hi __attribute__((__vector_size__(8)));
220b57cec5SDimitry Andric typedef char __v8qi __attribute__((__vector_size__(8)));
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
255f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS                                                     \
265f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
275f757f3fSDimitry Andric                  __min_vector_width__(64)))
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric /// Clears the MMX state by setting the state of the x87 stack registers
300b57cec5SDimitry Andric ///    to empty.
310b57cec5SDimitry Andric ///
320b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
330b57cec5SDimitry Andric ///
340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EMMS </c> instruction.
350b57cec5SDimitry Andric ///
365f757f3fSDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__,
375f757f3fSDimitry Andric                                       __target__("mmx,no-evex512")))
385f757f3fSDimitry Andric _mm_empty(void) {
390b57cec5SDimitry Andric   __builtin_ia32_emms();
400b57cec5SDimitry Andric }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, setting the lower 32 bits to the
430b57cec5SDimitry Andric ///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
440b57cec5SDimitry Andric ///
450b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
460b57cec5SDimitry Andric ///
470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD </c> instruction.
480b57cec5SDimitry Andric ///
490b57cec5SDimitry Andric /// \param __i
500b57cec5SDimitry Andric ///    A 32-bit integer value.
510b57cec5SDimitry Andric /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
520b57cec5SDimitry Andric ///    parameter. The upper 32 bits are set to 0.
530b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
540b57cec5SDimitry Andric _mm_cvtsi32_si64(int __i)
550b57cec5SDimitry Andric {
560b57cec5SDimitry Andric     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
600b57cec5SDimitry Andric ///    signed integer.
610b57cec5SDimitry Andric ///
620b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
630b57cec5SDimitry Andric ///
640b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD </c> instruction.
650b57cec5SDimitry Andric ///
660b57cec5SDimitry Andric /// \param __m
670b57cec5SDimitry Andric ///    A 64-bit integer vector.
680b57cec5SDimitry Andric /// \returns A 32-bit signed integer value containing the lower 32 bits of the
690b57cec5SDimitry Andric ///    parameter.
700b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS
710b57cec5SDimitry Andric _mm_cvtsi64_si32(__m64 __m)
720b57cec5SDimitry Andric {
730b57cec5SDimitry Andric     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
740b57cec5SDimitry Andric }
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric /// Casts a 64-bit signed integer value into a 64-bit integer vector.
770b57cec5SDimitry Andric ///
780b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
790b57cec5SDimitry Andric ///
800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
810b57cec5SDimitry Andric ///
820b57cec5SDimitry Andric /// \param __i
830b57cec5SDimitry Andric ///    A 64-bit signed integer.
840b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the same bitwise pattern as the
850b57cec5SDimitry Andric ///    parameter.
860b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
870b57cec5SDimitry Andric _mm_cvtsi64_m64(long long __i)
880b57cec5SDimitry Andric {
890b57cec5SDimitry Andric     return (__m64)__i;
900b57cec5SDimitry Andric }
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric /// Casts a 64-bit integer vector into a 64-bit signed integer value.
930b57cec5SDimitry Andric ///
940b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
950b57cec5SDimitry Andric ///
960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
970b57cec5SDimitry Andric ///
980b57cec5SDimitry Andric /// \param __m
990b57cec5SDimitry Andric ///    A 64-bit integer vector.
1000b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the same bitwise pattern as the
1010b57cec5SDimitry Andric ///    parameter.
1020b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS
1030b57cec5SDimitry Andric _mm_cvtm64_si64(__m64 __m)
1040b57cec5SDimitry Andric {
1050b57cec5SDimitry Andric     return (long long)__m;
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric 
108*0fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
109*0fca6ea1SDimitry Andric ///    vector parameters of [4 x i16] into 8-bit signed integer values, and
110*0fca6ea1SDimitry Andric ///    constructs a 64-bit integer vector of [8 x i8] as the result.
111*0fca6ea1SDimitry Andric ///
112*0fca6ea1SDimitry Andric ///    Positive values greater than 0x7F are saturated to 0x7F. Negative values
113*0fca6ea1SDimitry Andric ///    less than 0x80 are saturated to 0x80.
1140b57cec5SDimitry Andric ///
1150b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1160b57cec5SDimitry Andric ///
1170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
1180b57cec5SDimitry Andric ///
1190b57cec5SDimitry Andric /// \param __m1
120*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
121*0fca6ea1SDimitry Andric ///    written to the lower 32 bits of the result.
1220b57cec5SDimitry Andric /// \param __m2
123*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
124*0fca6ea1SDimitry Andric ///    written to the upper 32 bits of the result.
1250b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the converted
1260b57cec5SDimitry Andric ///    values.
1270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
1280b57cec5SDimitry Andric _mm_packs_pi16(__m64 __m1, __m64 __m2)
1290b57cec5SDimitry Andric {
1300b57cec5SDimitry Andric     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric 
133*0fca6ea1SDimitry Andric /// Converts, with saturation, 32-bit signed integers from both 64-bit integer
134*0fca6ea1SDimitry Andric ///    vector parameters of [2 x i32] into 16-bit signed integer values, and
135*0fca6ea1SDimitry Andric ///    constructs a 64-bit integer vector of [4 x i16] as the result.
136*0fca6ea1SDimitry Andric ///
137*0fca6ea1SDimitry Andric ///    Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
138*0fca6ea1SDimitry Andric ///    values less than 0x8000 are saturated to 0x8000.
1390b57cec5SDimitry Andric ///
1400b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1410b57cec5SDimitry Andric ///
1420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
1430b57cec5SDimitry Andric ///
1440b57cec5SDimitry Andric /// \param __m1
145*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
146*0fca6ea1SDimitry Andric ///    written to the lower 32 bits of the result.
1470b57cec5SDimitry Andric /// \param __m2
148*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
149*0fca6ea1SDimitry Andric ///    written to the upper 32 bits of the result.
1500b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the converted
1510b57cec5SDimitry Andric ///    values.
1520b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
1530b57cec5SDimitry Andric _mm_packs_pi32(__m64 __m1, __m64 __m2)
1540b57cec5SDimitry Andric {
1550b57cec5SDimitry Andric     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
158*0fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
159*0fca6ea1SDimitry Andric ///    vector parameters of [4 x i16] into 8-bit unsigned integer values, and
160*0fca6ea1SDimitry Andric ///    constructs a 64-bit integer vector of [8 x i8] as the result.
161*0fca6ea1SDimitry Andric ///
162*0fca6ea1SDimitry Andric ///    Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
163*0fca6ea1SDimitry Andric ///    saturated to 0.
1640b57cec5SDimitry Andric ///
1650b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1660b57cec5SDimitry Andric ///
1670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
1680b57cec5SDimitry Andric ///
1690b57cec5SDimitry Andric /// \param __m1
170*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
171*0fca6ea1SDimitry Andric ///    written to the lower 32 bits of the result.
1720b57cec5SDimitry Andric /// \param __m2
173*0fca6ea1SDimitry Andric ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
174*0fca6ea1SDimitry Andric ///    written to the upper 32 bits of the result.
1750b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the converted
1760b57cec5SDimitry Andric ///    values.
1770b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
1780b57cec5SDimitry Andric _mm_packs_pu16(__m64 __m1, __m64 __m2)
1790b57cec5SDimitry Andric {
1800b57cec5SDimitry Andric     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
1840b57cec5SDimitry Andric ///    and interleaves them into a 64-bit integer vector of [8 x i8].
1850b57cec5SDimitry Andric ///
1860b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1870b57cec5SDimitry Andric ///
1880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
1890b57cec5SDimitry Andric ///
1900b57cec5SDimitry Andric /// \param __m1
1910b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8]. \n
1920b57cec5SDimitry Andric ///    Bits [39:32] are written to bits [7:0] of the result. \n
1930b57cec5SDimitry Andric ///    Bits [47:40] are written to bits [23:16] of the result. \n
1940b57cec5SDimitry Andric ///    Bits [55:48] are written to bits [39:32] of the result. \n
1950b57cec5SDimitry Andric ///    Bits [63:56] are written to bits [55:48] of the result.
1960b57cec5SDimitry Andric /// \param __m2
1970b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
1980b57cec5SDimitry Andric ///    Bits [39:32] are written to bits [15:8] of the result. \n
1990b57cec5SDimitry Andric ///    Bits [47:40] are written to bits [31:24] of the result. \n
2000b57cec5SDimitry Andric ///    Bits [55:48] are written to bits [47:40] of the result. \n
2010b57cec5SDimitry Andric ///    Bits [63:56] are written to bits [63:56] of the result.
2020b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
2030b57cec5SDimitry Andric ///    values.
2040b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
2050b57cec5SDimitry Andric _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
2060b57cec5SDimitry Andric {
2070b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of
2110b57cec5SDimitry Andric ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
2120b57cec5SDimitry Andric ///
2130b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2140b57cec5SDimitry Andric ///
2150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
2160b57cec5SDimitry Andric ///
2170b57cec5SDimitry Andric /// \param __m1
2180b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
2190b57cec5SDimitry Andric ///    Bits [47:32] are written to bits [15:0] of the result. \n
2200b57cec5SDimitry Andric ///    Bits [63:48] are written to bits [47:32] of the result.
2210b57cec5SDimitry Andric /// \param __m2
2220b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
2230b57cec5SDimitry Andric ///    Bits [47:32] are written to bits [31:16] of the result. \n
2240b57cec5SDimitry Andric ///    Bits [63:48] are written to bits [63:48] of the result.
2250b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
2260b57cec5SDimitry Andric ///    values.
2270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
2280b57cec5SDimitry Andric _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
2290b57cec5SDimitry Andric {
2300b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of
2340b57cec5SDimitry Andric ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
2350b57cec5SDimitry Andric ///
2360b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2370b57cec5SDimitry Andric ///
2380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
2390b57cec5SDimitry Andric ///
2400b57cec5SDimitry Andric /// \param __m1
2410b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
2420b57cec5SDimitry Andric ///    the lower 32 bits of the result.
2430b57cec5SDimitry Andric /// \param __m2
2440b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
2450b57cec5SDimitry Andric ///    the upper 32 bits of the result.
2460b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
2470b57cec5SDimitry Andric ///    values.
2480b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
2490b57cec5SDimitry Andric _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
2500b57cec5SDimitry Andric {
2510b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
2520b57cec5SDimitry Andric }
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
2550b57cec5SDimitry Andric ///    and interleaves them into a 64-bit integer vector of [8 x i8].
2560b57cec5SDimitry Andric ///
2570b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2580b57cec5SDimitry Andric ///
2590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
2600b57cec5SDimitry Andric ///
2610b57cec5SDimitry Andric /// \param __m1
2620b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
2630b57cec5SDimitry Andric ///    Bits [7:0] are written to bits [7:0] of the result. \n
2640b57cec5SDimitry Andric ///    Bits [15:8] are written to bits [23:16] of the result. \n
2650b57cec5SDimitry Andric ///    Bits [23:16] are written to bits [39:32] of the result. \n
2660b57cec5SDimitry Andric ///    Bits [31:24] are written to bits [55:48] of the result.
2670b57cec5SDimitry Andric /// \param __m2
2680b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
2690b57cec5SDimitry Andric ///    Bits [7:0] are written to bits [15:8] of the result. \n
2700b57cec5SDimitry Andric ///    Bits [15:8] are written to bits [31:24] of the result. \n
2710b57cec5SDimitry Andric ///    Bits [23:16] are written to bits [47:40] of the result. \n
2720b57cec5SDimitry Andric ///    Bits [31:24] are written to bits [63:56] of the result.
2730b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
2740b57cec5SDimitry Andric ///    values.
2750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
2760b57cec5SDimitry Andric _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
2770b57cec5SDimitry Andric {
2780b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of
2820b57cec5SDimitry Andric ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
2830b57cec5SDimitry Andric ///
2840b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2850b57cec5SDimitry Andric ///
2860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
2870b57cec5SDimitry Andric ///
2880b57cec5SDimitry Andric /// \param __m1
2890b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
2900b57cec5SDimitry Andric ///    Bits [15:0] are written to bits [15:0] of the result. \n
2910b57cec5SDimitry Andric ///    Bits [31:16] are written to bits [47:32] of the result.
2920b57cec5SDimitry Andric /// \param __m2
2930b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
2940b57cec5SDimitry Andric ///    Bits [15:0] are written to bits [31:16] of the result. \n
2950b57cec5SDimitry Andric ///    Bits [31:16] are written to bits [63:48] of the result.
2960b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
2970b57cec5SDimitry Andric ///    values.
2980b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
2990b57cec5SDimitry Andric _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
3000b57cec5SDimitry Andric {
3010b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
3020b57cec5SDimitry Andric }
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of
3050b57cec5SDimitry Andric ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
3060b57cec5SDimitry Andric ///
3070b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3080b57cec5SDimitry Andric ///
3090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
3100b57cec5SDimitry Andric ///
3110b57cec5SDimitry Andric /// \param __m1
3120b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
3130b57cec5SDimitry Andric ///    the lower 32 bits of the result.
3140b57cec5SDimitry Andric /// \param __m2
3150b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
3160b57cec5SDimitry Andric ///    the upper 32 bits of the result.
3170b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
3180b57cec5SDimitry Andric ///    values.
3190b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
3200b57cec5SDimitry Andric _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
3210b57cec5SDimitry Andric {
3220b57cec5SDimitry Andric     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
3230b57cec5SDimitry Andric }
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric /// Adds each 8-bit integer element of the first 64-bit integer vector
3260b57cec5SDimitry Andric ///    of [8 x i8] to the corresponding 8-bit integer element of the second
3270b57cec5SDimitry Andric ///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
3280b57cec5SDimitry Andric ///    packed into a 64-bit integer vector of [8 x i8].
3290b57cec5SDimitry Andric ///
3300b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3310b57cec5SDimitry Andric ///
3320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDB </c> instruction.
3330b57cec5SDimitry Andric ///
3340b57cec5SDimitry Andric /// \param __m1
3350b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
3360b57cec5SDimitry Andric /// \param __m2
3370b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
3380b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
3390b57cec5SDimitry Andric ///    parameters.
3400b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
3410b57cec5SDimitry Andric _mm_add_pi8(__m64 __m1, __m64 __m2)
3420b57cec5SDimitry Andric {
3430b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
3440b57cec5SDimitry Andric }
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric /// Adds each 16-bit integer element of the first 64-bit integer vector
3470b57cec5SDimitry Andric ///    of [4 x i16] to the corresponding 16-bit integer element of the second
3480b57cec5SDimitry Andric ///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
3490b57cec5SDimitry Andric ///    packed into a 64-bit integer vector of [4 x i16].
3500b57cec5SDimitry Andric ///
3510b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3520b57cec5SDimitry Andric ///
3530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDW </c> instruction.
3540b57cec5SDimitry Andric ///
3550b57cec5SDimitry Andric /// \param __m1
3560b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
3570b57cec5SDimitry Andric /// \param __m2
3580b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
3590b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
3600b57cec5SDimitry Andric ///    parameters.
3610b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
3620b57cec5SDimitry Andric _mm_add_pi16(__m64 __m1, __m64 __m2)
3630b57cec5SDimitry Andric {
3640b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
3650b57cec5SDimitry Andric }
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric /// Adds each 32-bit integer element of the first 64-bit integer vector
3680b57cec5SDimitry Andric ///    of [2 x i32] to the corresponding 32-bit integer element of the second
3690b57cec5SDimitry Andric ///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
3700b57cec5SDimitry Andric ///    packed into a 64-bit integer vector of [2 x i32].
3710b57cec5SDimitry Andric ///
3720b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3730b57cec5SDimitry Andric ///
3740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDD </c> instruction.
3750b57cec5SDimitry Andric ///
3760b57cec5SDimitry Andric /// \param __m1
3770b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
3780b57cec5SDimitry Andric /// \param __m2
3790b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
3800b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
3810b57cec5SDimitry Andric ///    parameters.
3820b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
3830b57cec5SDimitry Andric _mm_add_pi32(__m64 __m1, __m64 __m2)
3840b57cec5SDimitry Andric {
3850b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric 
388*0fca6ea1SDimitry Andric /// Adds, with saturation, each 8-bit signed integer element of the first
389*0fca6ea1SDimitry Andric ///    64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
390*0fca6ea1SDimitry Andric ///    integer element of the second 64-bit integer vector of [8 x i8].
391*0fca6ea1SDimitry Andric ///
392*0fca6ea1SDimitry Andric ///    Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
393*0fca6ea1SDimitry Andric ///    less than 0x80 are saturated to 0x80. The results are packed into a
394*0fca6ea1SDimitry Andric ///    64-bit integer vector of [8 x i8].
3950b57cec5SDimitry Andric ///
3960b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3970b57cec5SDimitry Andric ///
3980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDSB </c> instruction.
3990b57cec5SDimitry Andric ///
4000b57cec5SDimitry Andric /// \param __m1
4010b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
4020b57cec5SDimitry Andric /// \param __m2
4030b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
4040b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
4050b57cec5SDimitry Andric ///    of both parameters.
4060b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4070b57cec5SDimitry Andric _mm_adds_pi8(__m64 __m1, __m64 __m2)
4080b57cec5SDimitry Andric {
4090b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric 
412*0fca6ea1SDimitry Andric /// Adds, with saturation, each 16-bit signed integer element of the first
413*0fca6ea1SDimitry Andric ///    64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
414*0fca6ea1SDimitry Andric ///    integer element of the second 64-bit integer vector of [4 x i16].
415*0fca6ea1SDimitry Andric ///
416*0fca6ea1SDimitry Andric ///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
417*0fca6ea1SDimitry Andric ///    less than 0x8000 are saturated to 0x8000. The results are packed into a
418*0fca6ea1SDimitry Andric ///    64-bit integer vector of [4 x i16].
4190b57cec5SDimitry Andric ///
4200b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4210b57cec5SDimitry Andric ///
4220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDSW </c> instruction.
4230b57cec5SDimitry Andric ///
4240b57cec5SDimitry Andric /// \param __m1
4250b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
4260b57cec5SDimitry Andric /// \param __m2
4270b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
4280b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
4290b57cec5SDimitry Andric ///    of both parameters.
4300b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4310b57cec5SDimitry Andric _mm_adds_pi16(__m64 __m1, __m64 __m2)
4320b57cec5SDimitry Andric {
4330b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
4340b57cec5SDimitry Andric }
4350b57cec5SDimitry Andric 
436*0fca6ea1SDimitry Andric /// Adds, with saturation, each 8-bit unsigned integer element of the first
437*0fca6ea1SDimitry Andric ///    64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
438*0fca6ea1SDimitry Andric ///    integer element of the second 64-bit integer vector of [8 x i8].
439*0fca6ea1SDimitry Andric ///
440*0fca6ea1SDimitry Andric ///    Sums greater than 0xFF are saturated to 0xFF. The results are packed
441*0fca6ea1SDimitry Andric ///    into a 64-bit integer vector of [8 x i8].
4420b57cec5SDimitry Andric ///
4430b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4440b57cec5SDimitry Andric ///
4450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
4460b57cec5SDimitry Andric ///
4470b57cec5SDimitry Andric /// \param __m1
4480b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
4490b57cec5SDimitry Andric /// \param __m2
4500b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
4510b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
4520b57cec5SDimitry Andric ///    unsigned sums of both parameters.
4530b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4540b57cec5SDimitry Andric _mm_adds_pu8(__m64 __m1, __m64 __m2)
4550b57cec5SDimitry Andric {
4560b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
4570b57cec5SDimitry Andric }
4580b57cec5SDimitry Andric 
459*0fca6ea1SDimitry Andric /// Adds, with saturation, each 16-bit unsigned integer element of the first
460*0fca6ea1SDimitry Andric ///    64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
461*0fca6ea1SDimitry Andric ///    integer element of the second 64-bit integer vector of [4 x i16].
462*0fca6ea1SDimitry Andric ///
463*0fca6ea1SDimitry Andric ///    Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
464*0fca6ea1SDimitry Andric ///    into a 64-bit integer vector of [4 x i16].
4650b57cec5SDimitry Andric ///
4660b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4670b57cec5SDimitry Andric ///
4680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
4690b57cec5SDimitry Andric ///
4700b57cec5SDimitry Andric /// \param __m1
4710b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
4720b57cec5SDimitry Andric /// \param __m2
4730b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
4740b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
4750b57cec5SDimitry Andric ///    unsigned sums of both parameters.
4760b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4770b57cec5SDimitry Andric _mm_adds_pu16(__m64 __m1, __m64 __m2)
4780b57cec5SDimitry Andric {
4790b57cec5SDimitry Andric     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
4800b57cec5SDimitry Andric }
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric /// Subtracts each 8-bit integer element of the second 64-bit integer
4830b57cec5SDimitry Andric ///    vector of [8 x i8] from the corresponding 8-bit integer element of the
4840b57cec5SDimitry Andric ///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
4850b57cec5SDimitry Andric ///    are packed into a 64-bit integer vector of [8 x i8].
4860b57cec5SDimitry Andric ///
4870b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4880b57cec5SDimitry Andric ///
4890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBB </c> instruction.
4900b57cec5SDimitry Andric ///
4910b57cec5SDimitry Andric /// \param __m1
4920b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the minuends.
4930b57cec5SDimitry Andric /// \param __m2
4940b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
4950b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
4960b57cec5SDimitry Andric ///    both parameters.
4970b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
4980b57cec5SDimitry Andric _mm_sub_pi8(__m64 __m1, __m64 __m2)
4990b57cec5SDimitry Andric {
5000b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
5010b57cec5SDimitry Andric }
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric /// Subtracts each 16-bit integer element of the second 64-bit integer
5040b57cec5SDimitry Andric ///    vector of [4 x i16] from the corresponding 16-bit integer element of the
5050b57cec5SDimitry Andric ///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
5060b57cec5SDimitry Andric ///    results are packed into a 64-bit integer vector of [4 x i16].
5070b57cec5SDimitry Andric ///
5080b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5090b57cec5SDimitry Andric ///
5100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBW </c> instruction.
5110b57cec5SDimitry Andric ///
5120b57cec5SDimitry Andric /// \param __m1
5130b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the minuends.
5140b57cec5SDimitry Andric /// \param __m2
5150b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
5160b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
5170b57cec5SDimitry Andric ///    both parameters.
5180b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
5190b57cec5SDimitry Andric _mm_sub_pi16(__m64 __m1, __m64 __m2)
5200b57cec5SDimitry Andric {
5210b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric /// Subtracts each 32-bit integer element of the second 64-bit integer
5250b57cec5SDimitry Andric ///    vector of [2 x i32] from the corresponding 32-bit integer element of the
5260b57cec5SDimitry Andric ///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
5270b57cec5SDimitry Andric ///    results are packed into a 64-bit integer vector of [2 x i32].
5280b57cec5SDimitry Andric ///
5290b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5300b57cec5SDimitry Andric ///
5310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBD </c> instruction.
5320b57cec5SDimitry Andric ///
5330b57cec5SDimitry Andric /// \param __m1
5340b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32] containing the minuends.
5350b57cec5SDimitry Andric /// \param __m2
5360b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
5370b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
5380b57cec5SDimitry Andric ///    both parameters.
5390b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
5400b57cec5SDimitry Andric _mm_sub_pi32(__m64 __m1, __m64 __m2)
5410b57cec5SDimitry Andric {
5420b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
5430b57cec5SDimitry Andric }
5440b57cec5SDimitry Andric 
545*0fca6ea1SDimitry Andric /// Subtracts, with saturation, each 8-bit signed integer element of the second
546*0fca6ea1SDimitry Andric ///    64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
547*0fca6ea1SDimitry Andric ///    integer element of the first 64-bit integer vector of [8 x i8].
548*0fca6ea1SDimitry Andric ///
549*0fca6ea1SDimitry Andric ///    Positive results greater than 0x7F are saturated to 0x7F. Negative
550*0fca6ea1SDimitry Andric ///    results less than 0x80 are saturated to 0x80. The results are packed
551*0fca6ea1SDimitry Andric ///    into a 64-bit integer vector of [8 x i8].
5520b57cec5SDimitry Andric ///
5530b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5540b57cec5SDimitry Andric ///
5550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
5560b57cec5SDimitry Andric ///
5570b57cec5SDimitry Andric /// \param __m1
5580b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the minuends.
5590b57cec5SDimitry Andric /// \param __m2
5600b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
5610b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
5620b57cec5SDimitry Andric ///    differences of both parameters.
5630b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
5640b57cec5SDimitry Andric _mm_subs_pi8(__m64 __m1, __m64 __m2)
5650b57cec5SDimitry Andric {
5660b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
5670b57cec5SDimitry Andric }
5680b57cec5SDimitry Andric 
569*0fca6ea1SDimitry Andric /// Subtracts, with saturation, each 16-bit signed integer element of the
570*0fca6ea1SDimitry Andric ///    second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
571*0fca6ea1SDimitry Andric ///    signed integer element of the first 64-bit integer vector of [4 x i16].
572*0fca6ea1SDimitry Andric ///
573*0fca6ea1SDimitry Andric ///    Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
574*0fca6ea1SDimitry Andric ///    results less than 0x8000 are saturated to 0x8000. The results are packed
575*0fca6ea1SDimitry Andric ///    into a 64-bit integer vector of [4 x i16].
5760b57cec5SDimitry Andric ///
5770b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5780b57cec5SDimitry Andric ///
5790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
5800b57cec5SDimitry Andric ///
5810b57cec5SDimitry Andric /// \param __m1
5820b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the minuends.
5830b57cec5SDimitry Andric /// \param __m2
5840b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
5850b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
5860b57cec5SDimitry Andric ///    differences of both parameters.
5870b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
5880b57cec5SDimitry Andric _mm_subs_pi16(__m64 __m1, __m64 __m2)
5890b57cec5SDimitry Andric {
5900b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
5910b57cec5SDimitry Andric }
5920b57cec5SDimitry Andric 
5930b57cec5SDimitry Andric /// Subtracts each 8-bit unsigned integer element of the second 64-bit
5940b57cec5SDimitry Andric ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
5950b57cec5SDimitry Andric ///    element of the first 64-bit integer vector of [8 x i8].
5960b57cec5SDimitry Andric ///
5970b57cec5SDimitry Andric ///    If an element of the first vector is less than the corresponding element
5980b57cec5SDimitry Andric ///    of the second vector, the result is saturated to 0. The results are
5990b57cec5SDimitry Andric ///    packed into a 64-bit integer vector of [8 x i8].
6000b57cec5SDimitry Andric ///
6010b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6020b57cec5SDimitry Andric ///
6030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
6040b57cec5SDimitry Andric ///
6050b57cec5SDimitry Andric /// \param __m1
6060b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the minuends.
6070b57cec5SDimitry Andric /// \param __m2
6080b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
6090b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
6100b57cec5SDimitry Andric ///    differences of both parameters.
6110b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
6120b57cec5SDimitry Andric _mm_subs_pu8(__m64 __m1, __m64 __m2)
6130b57cec5SDimitry Andric {
6140b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
6150b57cec5SDimitry Andric }
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric /// Subtracts each 16-bit unsigned integer element of the second 64-bit
6180b57cec5SDimitry Andric ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
6190b57cec5SDimitry Andric ///    integer element of the first 64-bit integer vector of [4 x i16].
6200b57cec5SDimitry Andric ///
6210b57cec5SDimitry Andric ///    If an element of the first vector is less than the corresponding element
6220b57cec5SDimitry Andric ///    of the second vector, the result is saturated to 0. The results are
6230b57cec5SDimitry Andric ///    packed into a 64-bit integer vector of [4 x i16].
6240b57cec5SDimitry Andric ///
6250b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6260b57cec5SDimitry Andric ///
6270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
6280b57cec5SDimitry Andric ///
6290b57cec5SDimitry Andric /// \param __m1
6300b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the minuends.
6310b57cec5SDimitry Andric /// \param __m2
6320b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
6330b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
6340b57cec5SDimitry Andric ///    differences of both parameters.
6350b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
6360b57cec5SDimitry Andric _mm_subs_pu16(__m64 __m1, __m64 __m2)
6370b57cec5SDimitry Andric {
6380b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
6390b57cec5SDimitry Andric }
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit
6420b57cec5SDimitry Andric ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
6430b57cec5SDimitry Andric ///    element of the second 64-bit integer vector of [4 x i16] and get four
6440b57cec5SDimitry Andric ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
6450b57cec5SDimitry Andric ///    The lower 32 bits of these two sums are packed into a 64-bit integer
6460b57cec5SDimitry Andric ///    vector of [2 x i32].
6470b57cec5SDimitry Andric ///
6480b57cec5SDimitry Andric ///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
6490b57cec5SDimitry Andric ///    of both parameters are multiplied, and the sum of both results is written
6500b57cec5SDimitry Andric ///    to bits [31:0] of the result.
6510b57cec5SDimitry Andric ///
6520b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6530b57cec5SDimitry Andric ///
6540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
6550b57cec5SDimitry Andric ///
6560b57cec5SDimitry Andric /// \param __m1
6570b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
6580b57cec5SDimitry Andric /// \param __m2
6590b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
6600b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
6610b57cec5SDimitry Andric ///    products of both parameters.
6620b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
6630b57cec5SDimitry Andric _mm_madd_pi16(__m64 __m1, __m64 __m2)
6640b57cec5SDimitry Andric {
6650b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
6660b57cec5SDimitry Andric }
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit
6690b57cec5SDimitry Andric ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
6700b57cec5SDimitry Andric ///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
6710b57cec5SDimitry Andric ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
6720b57cec5SDimitry Andric ///
6730b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6740b57cec5SDimitry Andric ///
6750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULHW </c> instruction.
6760b57cec5SDimitry Andric ///
6770b57cec5SDimitry Andric /// \param __m1
6780b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
6790b57cec5SDimitry Andric /// \param __m2
6800b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
6810b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
6820b57cec5SDimitry Andric ///    of the products of both parameters.
6830b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
6840b57cec5SDimitry Andric _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
6850b57cec5SDimitry Andric {
6860b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
6870b57cec5SDimitry Andric }
6880b57cec5SDimitry Andric 
6890b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit
6900b57cec5SDimitry Andric ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
6910b57cec5SDimitry Andric ///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
6920b57cec5SDimitry Andric ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
6930b57cec5SDimitry Andric ///
6940b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6950b57cec5SDimitry Andric ///
6960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULLW </c> instruction.
6970b57cec5SDimitry Andric ///
6980b57cec5SDimitry Andric /// \param __m1
6990b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
7000b57cec5SDimitry Andric /// \param __m2
7010b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
7020b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
7030b57cec5SDimitry Andric ///    of the products of both parameters.
7040b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
7050b57cec5SDimitry Andric _mm_mullo_pi16(__m64 __m1, __m64 __m2)
7060b57cec5SDimitry Andric {
7070b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
7080b57cec5SDimitry Andric }
7090b57cec5SDimitry Andric 
7100b57cec5SDimitry Andric /// Left-shifts each 16-bit signed integer element of the first
7110b57cec5SDimitry Andric ///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
7120b57cec5SDimitry Andric ///    of bits specified by the second parameter, which is a 64-bit integer. The
7130b57cec5SDimitry Andric ///    lower 16 bits of the results are packed into a 64-bit integer vector of
7140b57cec5SDimitry Andric ///    [4 x i16].
7150b57cec5SDimitry Andric ///
7160b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7170b57cec5SDimitry Andric ///
7180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
7190b57cec5SDimitry Andric ///
7200b57cec5SDimitry Andric /// \param __m
7210b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
7220b57cec5SDimitry Andric /// \param __count
7230b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
7240b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
7250b57cec5SDimitry Andric ///    values. If \a __count is greater or equal to 16, the result is set to all
7260b57cec5SDimitry Andric ///    0.
7270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
7280b57cec5SDimitry Andric _mm_sll_pi16(__m64 __m, __m64 __count)
7290b57cec5SDimitry Andric {
7300b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric 
7330b57cec5SDimitry Andric /// Left-shifts each 16-bit signed integer element of a 64-bit integer
7340b57cec5SDimitry Andric ///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
7350b57cec5SDimitry Andric ///    The lower 16 bits of the results are packed into a 64-bit integer vector
7360b57cec5SDimitry Andric ///    of [4 x i16].
7370b57cec5SDimitry Andric ///
7380b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7390b57cec5SDimitry Andric ///
7400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
7410b57cec5SDimitry Andric ///
7420b57cec5SDimitry Andric /// \param __m
7430b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
7440b57cec5SDimitry Andric /// \param __count
7450b57cec5SDimitry Andric ///    A 32-bit integer value.
7460b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
7470b57cec5SDimitry Andric ///    values. If \a __count is greater or equal to 16, the result is set to all
7480b57cec5SDimitry Andric ///    0.
7490b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
7500b57cec5SDimitry Andric _mm_slli_pi16(__m64 __m, int __count)
7510b57cec5SDimitry Andric {
7520b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
7530b57cec5SDimitry Andric }
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric /// Left-shifts each 32-bit signed integer element of the first
7560b57cec5SDimitry Andric ///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
7570b57cec5SDimitry Andric ///    of bits specified by the second parameter, which is a 64-bit integer. The
7580b57cec5SDimitry Andric ///    lower 32 bits of the results are packed into a 64-bit integer vector of
7590b57cec5SDimitry Andric ///    [2 x i32].
7600b57cec5SDimitry Andric ///
7610b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7620b57cec5SDimitry Andric ///
7630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
7640b57cec5SDimitry Andric ///
7650b57cec5SDimitry Andric /// \param __m
7660b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
7670b57cec5SDimitry Andric /// \param __count
7680b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
7690b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
7700b57cec5SDimitry Andric ///    values. If \a __count is greater or equal to 32, the result is set to all
7710b57cec5SDimitry Andric ///    0.
7720b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
7730b57cec5SDimitry Andric _mm_sll_pi32(__m64 __m, __m64 __count)
7740b57cec5SDimitry Andric {
7750b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric 
7780b57cec5SDimitry Andric /// Left-shifts each 32-bit signed integer element of a 64-bit integer
7790b57cec5SDimitry Andric ///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
7800b57cec5SDimitry Andric ///    The lower 32 bits of the results are packed into a 64-bit integer vector
7810b57cec5SDimitry Andric ///    of [2 x i32].
7820b57cec5SDimitry Andric ///
7830b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7840b57cec5SDimitry Andric ///
7850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
7860b57cec5SDimitry Andric ///
7870b57cec5SDimitry Andric /// \param __m
7880b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
7890b57cec5SDimitry Andric /// \param __count
7900b57cec5SDimitry Andric ///    A 32-bit integer value.
7910b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
7920b57cec5SDimitry Andric ///    values. If \a __count is greater or equal to 32, the result is set to all
7930b57cec5SDimitry Andric ///    0.
7940b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
7950b57cec5SDimitry Andric _mm_slli_pi32(__m64 __m, int __count)
7960b57cec5SDimitry Andric {
7970b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
7980b57cec5SDimitry Andric }
7990b57cec5SDimitry Andric 
8000b57cec5SDimitry Andric /// Left-shifts the first 64-bit integer parameter by the number of bits
8010b57cec5SDimitry Andric ///    specified by the second 64-bit integer parameter. The lower 64 bits of
8020b57cec5SDimitry Andric ///    result are returned.
8030b57cec5SDimitry Andric ///
8040b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
8050b57cec5SDimitry Andric ///
8060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
8070b57cec5SDimitry Andric ///
8080b57cec5SDimitry Andric /// \param __m
8090b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
8100b57cec5SDimitry Andric /// \param __count
8110b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
8120b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the left-shifted value. If
8130b57cec5SDimitry Andric ///     \a __count is greater or equal to 64, the result is set to 0.
8140b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
8150b57cec5SDimitry Andric _mm_sll_si64(__m64 __m, __m64 __count)
8160b57cec5SDimitry Andric {
8170b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
8180b57cec5SDimitry Andric }
8190b57cec5SDimitry Andric 
8200b57cec5SDimitry Andric /// Left-shifts the first parameter, which is a 64-bit integer, by the
8210b57cec5SDimitry Andric ///    number of bits specified by the second parameter, which is a 32-bit
8220b57cec5SDimitry Andric ///    integer. The lower 64 bits of result are returned.
8230b57cec5SDimitry Andric ///
8240b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
8250b57cec5SDimitry Andric ///
8260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
8270b57cec5SDimitry Andric ///
8280b57cec5SDimitry Andric /// \param __m
8290b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
8300b57cec5SDimitry Andric /// \param __count
8310b57cec5SDimitry Andric ///    A 32-bit integer value.
8320b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the left-shifted value. If
8330b57cec5SDimitry Andric ///     \a __count is greater or equal to 64, the result is set to 0.
8340b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
8350b57cec5SDimitry Andric _mm_slli_si64(__m64 __m, int __count)
8360b57cec5SDimitry Andric {
8370b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
8380b57cec5SDimitry Andric }
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of the first parameter,
8410b57cec5SDimitry Andric ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
8420b57cec5SDimitry Andric ///    specified by the second parameter, which is a 64-bit integer.
8430b57cec5SDimitry Andric ///
8440b57cec5SDimitry Andric ///    High-order bits are filled with the sign bit of the initial value of each
8450b57cec5SDimitry Andric ///    16-bit element. The 16-bit results are packed into a 64-bit integer
8460b57cec5SDimitry Andric ///    vector of [4 x i16].
8470b57cec5SDimitry Andric ///
8480b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
8490b57cec5SDimitry Andric ///
8500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
8510b57cec5SDimitry Andric ///
8520b57cec5SDimitry Andric /// \param __m
8530b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
8540b57cec5SDimitry Andric /// \param __count
8550b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
8560b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
8570b57cec5SDimitry Andric ///    values.
8580b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
8590b57cec5SDimitry Andric _mm_sra_pi16(__m64 __m, __m64 __count)
8600b57cec5SDimitry Andric {
8610b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric 
8640b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of a 64-bit integer vector
8650b57cec5SDimitry Andric ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
8660b57cec5SDimitry Andric ///
8670b57cec5SDimitry Andric ///    High-order bits are filled with the sign bit of the initial value of each
8680b57cec5SDimitry Andric ///    16-bit element. The 16-bit results are packed into a 64-bit integer
8690b57cec5SDimitry Andric ///    vector of [4 x i16].
8700b57cec5SDimitry Andric ///
8710b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
8720b57cec5SDimitry Andric ///
8730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
8740b57cec5SDimitry Andric ///
8750b57cec5SDimitry Andric /// \param __m
8760b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
8770b57cec5SDimitry Andric /// \param __count
8780b57cec5SDimitry Andric ///    A 32-bit integer value.
8790b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
8800b57cec5SDimitry Andric ///    values.
8810b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
8820b57cec5SDimitry Andric _mm_srai_pi16(__m64 __m, int __count)
8830b57cec5SDimitry Andric {
8840b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
8850b57cec5SDimitry Andric }
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of the first parameter,
8880b57cec5SDimitry Andric ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
8890b57cec5SDimitry Andric ///    specified by the second parameter, which is a 64-bit integer.
8900b57cec5SDimitry Andric ///
8910b57cec5SDimitry Andric ///    High-order bits are filled with the sign bit of the initial value of each
8920b57cec5SDimitry Andric ///    32-bit element. The 32-bit results are packed into a 64-bit integer
8930b57cec5SDimitry Andric ///    vector of [2 x i32].
8940b57cec5SDimitry Andric ///
8950b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
8960b57cec5SDimitry Andric ///
8970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
8980b57cec5SDimitry Andric ///
8990b57cec5SDimitry Andric /// \param __m
9000b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
9010b57cec5SDimitry Andric /// \param __count
9020b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
9030b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
9040b57cec5SDimitry Andric ///    values.
9050b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
9060b57cec5SDimitry Andric _mm_sra_pi32(__m64 __m, __m64 __count)
9070b57cec5SDimitry Andric {
9080b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
9090b57cec5SDimitry Andric }
9100b57cec5SDimitry Andric 
9110b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of a 64-bit integer vector
9120b57cec5SDimitry Andric ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
9130b57cec5SDimitry Andric ///
9140b57cec5SDimitry Andric ///    High-order bits are filled with the sign bit of the initial value of each
9150b57cec5SDimitry Andric ///    32-bit element. The 32-bit results are packed into a 64-bit integer
9160b57cec5SDimitry Andric ///    vector of [2 x i32].
9170b57cec5SDimitry Andric ///
9180b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
9190b57cec5SDimitry Andric ///
9200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
9210b57cec5SDimitry Andric ///
9220b57cec5SDimitry Andric /// \param __m
9230b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
9240b57cec5SDimitry Andric /// \param __count
9250b57cec5SDimitry Andric ///    A 32-bit integer value.
9260b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
9270b57cec5SDimitry Andric ///    values.
9280b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
9290b57cec5SDimitry Andric _mm_srai_pi32(__m64 __m, int __count)
9300b57cec5SDimitry Andric {
9310b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
9320b57cec5SDimitry Andric }
9330b57cec5SDimitry Andric 
9340b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of the first parameter,
9350b57cec5SDimitry Andric ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
9360b57cec5SDimitry Andric ///    specified by the second parameter, which is a 64-bit integer.
9370b57cec5SDimitry Andric ///
9380b57cec5SDimitry Andric ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
9390b57cec5SDimitry Andric ///    integer vector of [4 x i16].
9400b57cec5SDimitry Andric ///
9410b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
9420b57cec5SDimitry Andric ///
9430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
9440b57cec5SDimitry Andric ///
9450b57cec5SDimitry Andric /// \param __m
9460b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
9470b57cec5SDimitry Andric /// \param __count
9480b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
9490b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
9500b57cec5SDimitry Andric ///    values.
9510b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
9520b57cec5SDimitry Andric _mm_srl_pi16(__m64 __m, __m64 __count)
9530b57cec5SDimitry Andric {
9540b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
9550b57cec5SDimitry Andric }
9560b57cec5SDimitry Andric 
9570b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of a 64-bit integer vector
9580b57cec5SDimitry Andric ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
9590b57cec5SDimitry Andric ///
9600b57cec5SDimitry Andric ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
9610b57cec5SDimitry Andric ///    integer vector of [4 x i16].
9620b57cec5SDimitry Andric ///
9630b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
9640b57cec5SDimitry Andric ///
9650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
9660b57cec5SDimitry Andric ///
9670b57cec5SDimitry Andric /// \param __m
9680b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
9690b57cec5SDimitry Andric /// \param __count
9700b57cec5SDimitry Andric ///    A 32-bit integer value.
9710b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
9720b57cec5SDimitry Andric ///    values.
9730b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
9740b57cec5SDimitry Andric _mm_srli_pi16(__m64 __m, int __count)
9750b57cec5SDimitry Andric {
9760b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
9770b57cec5SDimitry Andric }
9780b57cec5SDimitry Andric 
9790b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of the first parameter,
9800b57cec5SDimitry Andric ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
9810b57cec5SDimitry Andric ///    specified by the second parameter, which is a 64-bit integer.
9820b57cec5SDimitry Andric ///
9830b57cec5SDimitry Andric ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
9840b57cec5SDimitry Andric ///    integer vector of [2 x i32].
9850b57cec5SDimitry Andric ///
9860b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
9870b57cec5SDimitry Andric ///
9880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
9890b57cec5SDimitry Andric ///
9900b57cec5SDimitry Andric /// \param __m
9910b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
9920b57cec5SDimitry Andric /// \param __count
9930b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
9940b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
9950b57cec5SDimitry Andric ///    values.
9960b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
9970b57cec5SDimitry Andric _mm_srl_pi32(__m64 __m, __m64 __count)
9980b57cec5SDimitry Andric {
9990b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
10000b57cec5SDimitry Andric }
10010b57cec5SDimitry Andric 
10020b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of a 64-bit integer vector
10030b57cec5SDimitry Andric ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
10040b57cec5SDimitry Andric ///
10050b57cec5SDimitry Andric ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
10060b57cec5SDimitry Andric ///    integer vector of [2 x i32].
10070b57cec5SDimitry Andric ///
10080b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
10090b57cec5SDimitry Andric ///
10100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
10110b57cec5SDimitry Andric ///
10120b57cec5SDimitry Andric /// \param __m
10130b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
10140b57cec5SDimitry Andric /// \param __count
10150b57cec5SDimitry Andric ///    A 32-bit integer value.
10160b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
10170b57cec5SDimitry Andric ///    values.
10180b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
10190b57cec5SDimitry Andric _mm_srli_pi32(__m64 __m, int __count)
10200b57cec5SDimitry Andric {
10210b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
10220b57cec5SDimitry Andric }
10230b57cec5SDimitry Andric 
10240b57cec5SDimitry Andric /// Right-shifts the first 64-bit integer parameter by the number of bits
10250b57cec5SDimitry Andric ///    specified by the second 64-bit integer parameter.
10260b57cec5SDimitry Andric ///
10270b57cec5SDimitry Andric ///    High-order bits are cleared.
10280b57cec5SDimitry Andric ///
10290b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
10300b57cec5SDimitry Andric ///
10310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
10320b57cec5SDimitry Andric ///
10330b57cec5SDimitry Andric /// \param __m
10340b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
10350b57cec5SDimitry Andric /// \param __count
10360b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
10370b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the right-shifted value.
10380b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
10390b57cec5SDimitry Andric _mm_srl_si64(__m64 __m, __m64 __count)
10400b57cec5SDimitry Andric {
10410b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
10420b57cec5SDimitry Andric }
10430b57cec5SDimitry Andric 
10440b57cec5SDimitry Andric /// Right-shifts the first parameter, which is a 64-bit integer, by the
10450b57cec5SDimitry Andric ///    number of bits specified by the second parameter, which is a 32-bit
10460b57cec5SDimitry Andric ///    integer.
10470b57cec5SDimitry Andric ///
10480b57cec5SDimitry Andric ///    High-order bits are cleared.
10490b57cec5SDimitry Andric ///
10500b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
10510b57cec5SDimitry Andric ///
10520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
10530b57cec5SDimitry Andric ///
10540b57cec5SDimitry Andric /// \param __m
10550b57cec5SDimitry Andric ///    A 64-bit integer vector interpreted as a single 64-bit integer.
10560b57cec5SDimitry Andric /// \param __count
10570b57cec5SDimitry Andric ///    A 32-bit integer value.
10580b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the right-shifted value.
10590b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
10600b57cec5SDimitry Andric _mm_srli_si64(__m64 __m, int __count)
10610b57cec5SDimitry Andric {
10620b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
10630b57cec5SDimitry Andric }
10640b57cec5SDimitry Andric 
10650b57cec5SDimitry Andric /// Performs a bitwise AND of two 64-bit integer vectors.
10660b57cec5SDimitry Andric ///
10670b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
10680b57cec5SDimitry Andric ///
10690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAND </c> instruction.
10700b57cec5SDimitry Andric ///
10710b57cec5SDimitry Andric /// \param __m1
10720b57cec5SDimitry Andric ///    A 64-bit integer vector.
10730b57cec5SDimitry Andric /// \param __m2
10740b57cec5SDimitry Andric ///    A 64-bit integer vector.
10750b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise AND of both
10760b57cec5SDimitry Andric ///    parameters.
10770b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
10780b57cec5SDimitry Andric _mm_and_si64(__m64 __m1, __m64 __m2)
10790b57cec5SDimitry Andric {
10800b57cec5SDimitry Andric     return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
10810b57cec5SDimitry Andric }
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric /// Performs a bitwise NOT of the first 64-bit integer vector, and then
10840b57cec5SDimitry Andric ///    performs a bitwise AND of the intermediate result and the second 64-bit
10850b57cec5SDimitry Andric ///    integer vector.
10860b57cec5SDimitry Andric ///
10870b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
10880b57cec5SDimitry Andric ///
10890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PANDN </c> instruction.
10900b57cec5SDimitry Andric ///
10910b57cec5SDimitry Andric /// \param __m1
10920b57cec5SDimitry Andric ///    A 64-bit integer vector. The one's complement of this parameter is used
10930b57cec5SDimitry Andric ///    in the bitwise AND.
10940b57cec5SDimitry Andric /// \param __m2
10950b57cec5SDimitry Andric ///    A 64-bit integer vector.
10960b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise AND of the second
10970b57cec5SDimitry Andric ///    parameter and the one's complement of the first parameter.
10980b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
10990b57cec5SDimitry Andric _mm_andnot_si64(__m64 __m1, __m64 __m2)
11000b57cec5SDimitry Andric {
11010b57cec5SDimitry Andric     return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
11020b57cec5SDimitry Andric }
11030b57cec5SDimitry Andric 
11040b57cec5SDimitry Andric /// Performs a bitwise OR of two 64-bit integer vectors.
11050b57cec5SDimitry Andric ///
11060b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
11070b57cec5SDimitry Andric ///
11080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> POR </c> instruction.
11090b57cec5SDimitry Andric ///
11100b57cec5SDimitry Andric /// \param __m1
11110b57cec5SDimitry Andric ///    A 64-bit integer vector.
11120b57cec5SDimitry Andric /// \param __m2
11130b57cec5SDimitry Andric ///    A 64-bit integer vector.
11140b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise OR of both
11150b57cec5SDimitry Andric ///    parameters.
11160b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
11170b57cec5SDimitry Andric _mm_or_si64(__m64 __m1, __m64 __m2)
11180b57cec5SDimitry Andric {
11190b57cec5SDimitry Andric     return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
11200b57cec5SDimitry Andric }
11210b57cec5SDimitry Andric 
11220b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 64-bit integer vectors.
11230b57cec5SDimitry Andric ///
11240b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
11250b57cec5SDimitry Andric ///
11260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PXOR </c> instruction.
11270b57cec5SDimitry Andric ///
11280b57cec5SDimitry Andric /// \param __m1
11290b57cec5SDimitry Andric ///    A 64-bit integer vector.
11300b57cec5SDimitry Andric /// \param __m2
11310b57cec5SDimitry Andric ///    A 64-bit integer vector.
11320b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
11330b57cec5SDimitry Andric ///    parameters.
11340b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
11350b57cec5SDimitry Andric _mm_xor_si64(__m64 __m1, __m64 __m2)
11360b57cec5SDimitry Andric {
11370b57cec5SDimitry Andric     return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
11380b57cec5SDimitry Andric }
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric /// Compares the 8-bit integer elements of two 64-bit integer vectors of
11410b57cec5SDimitry Andric ///    [8 x i8] to determine if the element of the first vector is equal to the
11420b57cec5SDimitry Andric ///    corresponding element of the second vector.
11430b57cec5SDimitry Andric ///
1144*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFF for true.
11450b57cec5SDimitry Andric ///
11460b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
11470b57cec5SDimitry Andric ///
11480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
11490b57cec5SDimitry Andric ///
11500b57cec5SDimitry Andric /// \param __m1
11510b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
11520b57cec5SDimitry Andric /// \param __m2
11530b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
11540b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
11550b57cec5SDimitry Andric ///    results.
11560b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
11570b57cec5SDimitry Andric _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
11580b57cec5SDimitry Andric {
11590b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
11600b57cec5SDimitry Andric }
11610b57cec5SDimitry Andric 
11620b57cec5SDimitry Andric /// Compares the 16-bit integer elements of two 64-bit integer vectors of
11630b57cec5SDimitry Andric ///    [4 x i16] to determine if the element of the first vector is equal to the
11640b57cec5SDimitry Andric ///    corresponding element of the second vector.
11650b57cec5SDimitry Andric ///
1166*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFFFF for true.
11670b57cec5SDimitry Andric ///
11680b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
11690b57cec5SDimitry Andric ///
11700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
11710b57cec5SDimitry Andric ///
11720b57cec5SDimitry Andric /// \param __m1
11730b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
11740b57cec5SDimitry Andric /// \param __m2
11750b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
11760b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
11770b57cec5SDimitry Andric ///    results.
11780b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
11790b57cec5SDimitry Andric _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
11800b57cec5SDimitry Andric {
11810b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
11820b57cec5SDimitry Andric }
11830b57cec5SDimitry Andric 
11840b57cec5SDimitry Andric /// Compares the 32-bit integer elements of two 64-bit integer vectors of
11850b57cec5SDimitry Andric ///    [2 x i32] to determine if the element of the first vector is equal to the
11860b57cec5SDimitry Andric ///    corresponding element of the second vector.
11870b57cec5SDimitry Andric ///
1188*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFFFFFFFF for true.
11890b57cec5SDimitry Andric ///
11900b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
11910b57cec5SDimitry Andric ///
11920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
11930b57cec5SDimitry Andric ///
11940b57cec5SDimitry Andric /// \param __m1
11950b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
11960b57cec5SDimitry Andric /// \param __m2
11970b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
11980b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
11990b57cec5SDimitry Andric ///    results.
12000b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
12010b57cec5SDimitry Andric _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
12020b57cec5SDimitry Andric {
12030b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric 
12060b57cec5SDimitry Andric /// Compares the 8-bit integer elements of two 64-bit integer vectors of
12070b57cec5SDimitry Andric ///    [8 x i8] to determine if the element of the first vector is greater than
12080b57cec5SDimitry Andric ///    the corresponding element of the second vector.
12090b57cec5SDimitry Andric ///
1210*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFF for true.
12110b57cec5SDimitry Andric ///
12120b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
12130b57cec5SDimitry Andric ///
12140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
12150b57cec5SDimitry Andric ///
12160b57cec5SDimitry Andric /// \param __m1
12170b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
12180b57cec5SDimitry Andric /// \param __m2
12190b57cec5SDimitry Andric ///    A 64-bit integer vector of [8 x i8].
12200b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
12210b57cec5SDimitry Andric ///    results.
12220b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
12230b57cec5SDimitry Andric _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
12240b57cec5SDimitry Andric {
12250b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
12260b57cec5SDimitry Andric }
12270b57cec5SDimitry Andric 
12280b57cec5SDimitry Andric /// Compares the 16-bit integer elements of two 64-bit integer vectors of
12290b57cec5SDimitry Andric ///    [4 x i16] to determine if the element of the first vector is greater than
12300b57cec5SDimitry Andric ///    the corresponding element of the second vector.
12310b57cec5SDimitry Andric ///
1232*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFFFF for true.
12330b57cec5SDimitry Andric ///
12340b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
12350b57cec5SDimitry Andric ///
12360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
12370b57cec5SDimitry Andric ///
12380b57cec5SDimitry Andric /// \param __m1
12390b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
12400b57cec5SDimitry Andric /// \param __m2
12410b57cec5SDimitry Andric ///    A 64-bit integer vector of [4 x i16].
12420b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
12430b57cec5SDimitry Andric ///    results.
12440b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
12450b57cec5SDimitry Andric _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
12460b57cec5SDimitry Andric {
12470b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
12480b57cec5SDimitry Andric }
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric /// Compares the 32-bit integer elements of two 64-bit integer vectors of
12510b57cec5SDimitry Andric ///    [2 x i32] to determine if the element of the first vector is greater than
12520b57cec5SDimitry Andric ///    the corresponding element of the second vector.
12530b57cec5SDimitry Andric ///
1254*0fca6ea1SDimitry Andric ///    Each comparison returns 0 for false, 0xFFFFFFFF for true.
12550b57cec5SDimitry Andric ///
12560b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
12570b57cec5SDimitry Andric ///
12580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
12590b57cec5SDimitry Andric ///
12600b57cec5SDimitry Andric /// \param __m1
12610b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
12620b57cec5SDimitry Andric /// \param __m2
12630b57cec5SDimitry Andric ///    A 64-bit integer vector of [2 x i32].
12640b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
12650b57cec5SDimitry Andric ///    results.
12660b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
12670b57cec5SDimitry Andric _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
12680b57cec5SDimitry Andric {
12690b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
12700b57cec5SDimitry Andric }
12710b57cec5SDimitry Andric 
12720b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized to zero.
12730b57cec5SDimitry Andric ///
12740b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
12750b57cec5SDimitry Andric ///
12760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PXOR </c> instruction.
12770b57cec5SDimitry Andric ///
12780b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector with all elements set to zero.
12790b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
12800b57cec5SDimitry Andric _mm_setzero_si64(void)
12810b57cec5SDimitry Andric {
12820b57cec5SDimitry Andric     return __extension__ (__m64){ 0LL };
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified
12860b57cec5SDimitry Andric ///    32-bit integer values.
12870b57cec5SDimitry Andric ///
12880b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
12890b57cec5SDimitry Andric ///
12900b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
12910b57cec5SDimitry Andric ///    instruction.
12920b57cec5SDimitry Andric ///
12930b57cec5SDimitry Andric /// \param __i1
12940b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize the upper 32 bits of the
12950b57cec5SDimitry Andric ///    result.
12960b57cec5SDimitry Andric /// \param __i0
12970b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize the lower 32 bits of the
12980b57cec5SDimitry Andric ///    result.
12990b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
13000b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
13010b57cec5SDimitry Andric _mm_set_pi32(int __i1, int __i0)
13020b57cec5SDimitry Andric {
13030b57cec5SDimitry Andric     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
13040b57cec5SDimitry Andric }
13050b57cec5SDimitry Andric 
13060b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified
13070b57cec5SDimitry Andric ///    16-bit integer values.
13080b57cec5SDimitry Andric ///
13090b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
13100b57cec5SDimitry Andric ///
13110b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
13120b57cec5SDimitry Andric ///    instruction.
13130b57cec5SDimitry Andric ///
13140b57cec5SDimitry Andric /// \param __s3
13150b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [63:48] of the result.
13160b57cec5SDimitry Andric /// \param __s2
13170b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [47:32] of the result.
13180b57cec5SDimitry Andric /// \param __s1
13190b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [31:16] of the result.
13200b57cec5SDimitry Andric /// \param __s0
13210b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [15:0] of the result.
13220b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
13230b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
13240b57cec5SDimitry Andric _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
13250b57cec5SDimitry Andric {
13260b57cec5SDimitry Andric     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
13270b57cec5SDimitry Andric }
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified
13300b57cec5SDimitry Andric ///    8-bit integer values.
13310b57cec5SDimitry Andric ///
13320b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
13330b57cec5SDimitry Andric ///
13340b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
13350b57cec5SDimitry Andric ///    instruction.
13360b57cec5SDimitry Andric ///
13370b57cec5SDimitry Andric /// \param __b7
13380b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [63:56] of the result.
13390b57cec5SDimitry Andric /// \param __b6
13400b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [55:48] of the result.
13410b57cec5SDimitry Andric /// \param __b5
13420b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [47:40] of the result.
13430b57cec5SDimitry Andric /// \param __b4
13440b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [39:32] of the result.
13450b57cec5SDimitry Andric /// \param __b3
13460b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [31:24] of the result.
13470b57cec5SDimitry Andric /// \param __b2
13480b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [23:16] of the result.
13490b57cec5SDimitry Andric /// \param __b1
13500b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [15:8] of the result.
13510b57cec5SDimitry Andric /// \param __b0
13520b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [7:0] of the result.
13530b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
13540b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
13550b57cec5SDimitry Andric _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
13560b57cec5SDimitry Andric             char __b1, char __b0)
13570b57cec5SDimitry Andric {
13580b57cec5SDimitry Andric     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
13590b57cec5SDimitry Andric                                                __b4, __b5, __b6, __b7);
13600b57cec5SDimitry Andric }
13610b57cec5SDimitry Andric 
13620b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [2 x i32], with each of the
13630b57cec5SDimitry Andric ///    32-bit integer vector elements set to the specified 32-bit integer
13640b57cec5SDimitry Andric ///    value.
13650b57cec5SDimitry Andric ///
13660b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
13670b57cec5SDimitry Andric ///
13680b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
13690b57cec5SDimitry Andric ///    instruction.
13700b57cec5SDimitry Andric ///
13710b57cec5SDimitry Andric /// \param __i
13720b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize each vector element of the
13730b57cec5SDimitry Andric ///    result.
13740b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [2 x i32].
13750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
13760b57cec5SDimitry Andric _mm_set1_pi32(int __i)
13770b57cec5SDimitry Andric {
13780b57cec5SDimitry Andric     return _mm_set_pi32(__i, __i);
13790b57cec5SDimitry Andric }
13800b57cec5SDimitry Andric 
13810b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [4 x i16], with each of the
13820b57cec5SDimitry Andric ///    16-bit integer vector elements set to the specified 16-bit integer
13830b57cec5SDimitry Andric ///    value.
13840b57cec5SDimitry Andric ///
13850b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
13860b57cec5SDimitry Andric ///
13870b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
13880b57cec5SDimitry Andric ///    instruction.
13890b57cec5SDimitry Andric ///
13900b57cec5SDimitry Andric /// \param __w
13910b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize each vector element of the
13920b57cec5SDimitry Andric ///    result.
13930b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [4 x i16].
13940b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
13950b57cec5SDimitry Andric _mm_set1_pi16(short __w)
13960b57cec5SDimitry Andric {
13970b57cec5SDimitry Andric     return _mm_set_pi16(__w, __w, __w, __w);
13980b57cec5SDimitry Andric }
13990b57cec5SDimitry Andric 
14000b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [8 x i8], with each of the
14010b57cec5SDimitry Andric ///    8-bit integer vector elements set to the specified 8-bit integer value.
14020b57cec5SDimitry Andric ///
14030b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
14040b57cec5SDimitry Andric ///
14050b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
14060b57cec5SDimitry Andric ///    instruction.
14070b57cec5SDimitry Andric ///
14080b57cec5SDimitry Andric /// \param __b
14090b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize each vector element of the
14100b57cec5SDimitry Andric ///    result.
14110b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [8 x i8].
14120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
14130b57cec5SDimitry Andric _mm_set1_pi8(char __b)
14140b57cec5SDimitry Andric {
14150b57cec5SDimitry Andric     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
14160b57cec5SDimitry Andric }
14170b57cec5SDimitry Andric 
14180b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with
14190b57cec5SDimitry Andric ///    the specified 32-bit integer values.
14200b57cec5SDimitry Andric ///
14210b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
14220b57cec5SDimitry Andric ///
14230b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
14240b57cec5SDimitry Andric ///    instruction.
14250b57cec5SDimitry Andric ///
14260b57cec5SDimitry Andric /// \param __i0
14270b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize the lower 32 bits of the
14280b57cec5SDimitry Andric ///    result.
14290b57cec5SDimitry Andric /// \param __i1
14300b57cec5SDimitry Andric ///    A 32-bit integer value used to initialize the upper 32 bits of the
14310b57cec5SDimitry Andric ///    result.
14320b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
14330b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
14340b57cec5SDimitry Andric _mm_setr_pi32(int __i0, int __i1)
14350b57cec5SDimitry Andric {
14360b57cec5SDimitry Andric     return _mm_set_pi32(__i1, __i0);
14370b57cec5SDimitry Andric }
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with
14400b57cec5SDimitry Andric ///    the specified 16-bit integer values.
14410b57cec5SDimitry Andric ///
14420b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
14430b57cec5SDimitry Andric ///
14440b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
14450b57cec5SDimitry Andric ///    instruction.
14460b57cec5SDimitry Andric ///
14470b57cec5SDimitry Andric /// \param __w0
14480b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [15:0] of the result.
14490b57cec5SDimitry Andric /// \param __w1
14500b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [31:16] of the result.
14510b57cec5SDimitry Andric /// \param __w2
14520b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [47:32] of the result.
14530b57cec5SDimitry Andric /// \param __w3
14540b57cec5SDimitry Andric ///    A 16-bit integer value used to initialize bits [63:48] of the result.
14550b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
14560b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
14570b57cec5SDimitry Andric _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
14580b57cec5SDimitry Andric {
14590b57cec5SDimitry Andric     return _mm_set_pi16(__w3, __w2, __w1, __w0);
14600b57cec5SDimitry Andric }
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with
14630b57cec5SDimitry Andric ///    the specified 8-bit integer values.
14640b57cec5SDimitry Andric ///
14650b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
14660b57cec5SDimitry Andric ///
14670b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific
14680b57cec5SDimitry Andric ///    instruction.
14690b57cec5SDimitry Andric ///
14700b57cec5SDimitry Andric /// \param __b0
14710b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [7:0] of the result.
14720b57cec5SDimitry Andric /// \param __b1
14730b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [15:8] of the result.
14740b57cec5SDimitry Andric /// \param __b2
14750b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [23:16] of the result.
14760b57cec5SDimitry Andric /// \param __b3
14770b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [31:24] of the result.
14780b57cec5SDimitry Andric /// \param __b4
14790b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [39:32] of the result.
14800b57cec5SDimitry Andric /// \param __b5
14810b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [47:40] of the result.
14820b57cec5SDimitry Andric /// \param __b6
14830b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [55:48] of the result.
14840b57cec5SDimitry Andric /// \param __b7
14850b57cec5SDimitry Andric ///    An 8-bit integer value used to initialize bits [63:56] of the result.
14860b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector.
14870b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS
14880b57cec5SDimitry Andric _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
14890b57cec5SDimitry Andric              char __b6, char __b7)
14900b57cec5SDimitry Andric {
14910b57cec5SDimitry Andric     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
14920b57cec5SDimitry Andric }
14930b57cec5SDimitry Andric 
14940b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
14950b57cec5SDimitry Andric 
14960b57cec5SDimitry Andric /* Aliases for compatibility. */
14970b57cec5SDimitry Andric #define _m_empty _mm_empty
14980b57cec5SDimitry Andric #define _m_from_int _mm_cvtsi32_si64
14990b57cec5SDimitry Andric #define _m_from_int64 _mm_cvtsi64_m64
15000b57cec5SDimitry Andric #define _m_to_int _mm_cvtsi64_si32
15010b57cec5SDimitry Andric #define _m_to_int64 _mm_cvtm64_si64
15020b57cec5SDimitry Andric #define _m_packsswb _mm_packs_pi16
15030b57cec5SDimitry Andric #define _m_packssdw _mm_packs_pi32
15040b57cec5SDimitry Andric #define _m_packuswb _mm_packs_pu16
15050b57cec5SDimitry Andric #define _m_punpckhbw _mm_unpackhi_pi8
15060b57cec5SDimitry Andric #define _m_punpckhwd _mm_unpackhi_pi16
15070b57cec5SDimitry Andric #define _m_punpckhdq _mm_unpackhi_pi32
15080b57cec5SDimitry Andric #define _m_punpcklbw _mm_unpacklo_pi8
15090b57cec5SDimitry Andric #define _m_punpcklwd _mm_unpacklo_pi16
15100b57cec5SDimitry Andric #define _m_punpckldq _mm_unpacklo_pi32
15110b57cec5SDimitry Andric #define _m_paddb _mm_add_pi8
15120b57cec5SDimitry Andric #define _m_paddw _mm_add_pi16
15130b57cec5SDimitry Andric #define _m_paddd _mm_add_pi32
15140b57cec5SDimitry Andric #define _m_paddsb _mm_adds_pi8
15150b57cec5SDimitry Andric #define _m_paddsw _mm_adds_pi16
15160b57cec5SDimitry Andric #define _m_paddusb _mm_adds_pu8
15170b57cec5SDimitry Andric #define _m_paddusw _mm_adds_pu16
15180b57cec5SDimitry Andric #define _m_psubb _mm_sub_pi8
15190b57cec5SDimitry Andric #define _m_psubw _mm_sub_pi16
15200b57cec5SDimitry Andric #define _m_psubd _mm_sub_pi32
15210b57cec5SDimitry Andric #define _m_psubsb _mm_subs_pi8
15220b57cec5SDimitry Andric #define _m_psubsw _mm_subs_pi16
15230b57cec5SDimitry Andric #define _m_psubusb _mm_subs_pu8
15240b57cec5SDimitry Andric #define _m_psubusw _mm_subs_pu16
15250b57cec5SDimitry Andric #define _m_pmaddwd _mm_madd_pi16
15260b57cec5SDimitry Andric #define _m_pmulhw _mm_mulhi_pi16
15270b57cec5SDimitry Andric #define _m_pmullw _mm_mullo_pi16
15280b57cec5SDimitry Andric #define _m_psllw _mm_sll_pi16
15290b57cec5SDimitry Andric #define _m_psllwi _mm_slli_pi16
15300b57cec5SDimitry Andric #define _m_pslld _mm_sll_pi32
15310b57cec5SDimitry Andric #define _m_pslldi _mm_slli_pi32
15320b57cec5SDimitry Andric #define _m_psllq _mm_sll_si64
15330b57cec5SDimitry Andric #define _m_psllqi _mm_slli_si64
15340b57cec5SDimitry Andric #define _m_psraw _mm_sra_pi16
15350b57cec5SDimitry Andric #define _m_psrawi _mm_srai_pi16
15360b57cec5SDimitry Andric #define _m_psrad _mm_sra_pi32
15370b57cec5SDimitry Andric #define _m_psradi _mm_srai_pi32
15380b57cec5SDimitry Andric #define _m_psrlw _mm_srl_pi16
15390b57cec5SDimitry Andric #define _m_psrlwi _mm_srli_pi16
15400b57cec5SDimitry Andric #define _m_psrld _mm_srl_pi32
15410b57cec5SDimitry Andric #define _m_psrldi _mm_srli_pi32
15420b57cec5SDimitry Andric #define _m_psrlq _mm_srl_si64
15430b57cec5SDimitry Andric #define _m_psrlqi _mm_srli_si64
15440b57cec5SDimitry Andric #define _m_pand _mm_and_si64
15450b57cec5SDimitry Andric #define _m_pandn _mm_andnot_si64
15460b57cec5SDimitry Andric #define _m_por _mm_or_si64
15470b57cec5SDimitry Andric #define _m_pxor _mm_xor_si64
15480b57cec5SDimitry Andric #define _m_pcmpeqb _mm_cmpeq_pi8
15490b57cec5SDimitry Andric #define _m_pcmpeqw _mm_cmpeq_pi16
15500b57cec5SDimitry Andric #define _m_pcmpeqd _mm_cmpeq_pi32
15510b57cec5SDimitry Andric #define _m_pcmpgtb _mm_cmpgt_pi8
15520b57cec5SDimitry Andric #define _m_pcmpgtw _mm_cmpgt_pi16
15530b57cec5SDimitry Andric #define _m_pcmpgtd _mm_cmpgt_pi32
15540b57cec5SDimitry Andric 
15550b57cec5SDimitry Andric #endif /* __MMINTRIN_H */
15560b57cec5SDimitry Andric 
1557