xref: /freebsd-src/contrib/llvm-project/clang/lib/Headers/tmmintrin.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #ifndef __TMMINTRIN_H
110b57cec5SDimitry Andric #define __TMMINTRIN_H
120b57cec5SDimitry Andric 
13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__)
14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture"
15349cc55cSDimitry Andric #endif
16349cc55cSDimitry Andric 
170b57cec5SDimitry Andric #include <pmmintrin.h>
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
205f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS                                                     \
215f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
225f757f3fSDimitry Andric                  __target__("ssse3,no-evex512"), __min_vector_width__(64)))
235f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS_MMX                                                 \
245f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
255f757f3fSDimitry Andric                  __target__("mmx,ssse3,no-evex512"),                           \
265f757f3fSDimitry Andric                  __min_vector_width__(64)))
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 8-bit signed
290b57cec5SDimitry Andric ///    integers in the source operand and stores the 8-bit unsigned integer
300b57cec5SDimitry Andric ///    results in the destination.
310b57cec5SDimitry Andric ///
320b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
330b57cec5SDimitry Andric ///
340b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSB instruction.
350b57cec5SDimitry Andric ///
360b57cec5SDimitry Andric /// \param __a
370b57cec5SDimitry Andric ///    A 64-bit vector of [8 x i8].
380b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the
390b57cec5SDimitry Andric ///    elements in the operand.
400b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
410b57cec5SDimitry Andric _mm_abs_pi8(__m64 __a)
420b57cec5SDimitry Andric {
430b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
440b57cec5SDimitry Andric }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 8-bit signed
470b57cec5SDimitry Andric ///    integers in the source operand and stores the 8-bit unsigned integer
480b57cec5SDimitry Andric ///    results in the destination.
490b57cec5SDimitry Andric ///
500b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
510b57cec5SDimitry Andric ///
520b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSB instruction.
530b57cec5SDimitry Andric ///
540b57cec5SDimitry Andric /// \param __a
550b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8].
560b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the
570b57cec5SDimitry Andric ///    elements in the operand.
580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
590b57cec5SDimitry Andric _mm_abs_epi8(__m128i __a)
600b57cec5SDimitry Andric {
6104eeddc0SDimitry Andric     return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
620b57cec5SDimitry Andric }
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 16-bit signed
650b57cec5SDimitry Andric ///    integers in the source operand and stores the 16-bit unsigned integer
660b57cec5SDimitry Andric ///    results in the destination.
670b57cec5SDimitry Andric ///
680b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
690b57cec5SDimitry Andric ///
700b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSW instruction.
710b57cec5SDimitry Andric ///
720b57cec5SDimitry Andric /// \param __a
730b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16].
740b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the
750b57cec5SDimitry Andric ///    elements in the operand.
760b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
770b57cec5SDimitry Andric _mm_abs_pi16(__m64 __a)
780b57cec5SDimitry Andric {
790b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
800b57cec5SDimitry Andric }
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 16-bit signed
830b57cec5SDimitry Andric ///    integers in the source operand and stores the 16-bit unsigned integer
840b57cec5SDimitry Andric ///    results in the destination.
850b57cec5SDimitry Andric ///
860b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
870b57cec5SDimitry Andric ///
880b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSW instruction.
890b57cec5SDimitry Andric ///
900b57cec5SDimitry Andric /// \param __a
910b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16].
920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the
930b57cec5SDimitry Andric ///    elements in the operand.
940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
950b57cec5SDimitry Andric _mm_abs_epi16(__m128i __a)
960b57cec5SDimitry Andric {
9704eeddc0SDimitry Andric     return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 32-bit signed
1010b57cec5SDimitry Andric ///    integers in the source operand and stores the 32-bit unsigned integer
1020b57cec5SDimitry Andric ///    results in the destination.
1030b57cec5SDimitry Andric ///
1040b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1050b57cec5SDimitry Andric ///
1060b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSD instruction.
1070b57cec5SDimitry Andric ///
1080b57cec5SDimitry Andric /// \param __a
1090b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32].
1100b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the
1110b57cec5SDimitry Andric ///    elements in the operand.
1120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1130b57cec5SDimitry Andric _mm_abs_pi32(__m64 __a)
1140b57cec5SDimitry Andric {
1150b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pabsd((__v2si)__a);
1160b57cec5SDimitry Andric }
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 32-bit signed
1190b57cec5SDimitry Andric ///    integers in the source operand and stores the 32-bit unsigned integer
1200b57cec5SDimitry Andric ///    results in the destination.
1210b57cec5SDimitry Andric ///
1220b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1230b57cec5SDimitry Andric ///
1240b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSD instruction.
1250b57cec5SDimitry Andric ///
1260b57cec5SDimitry Andric /// \param __a
1270b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32].
1280b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the
1290b57cec5SDimitry Andric ///    elements in the operand.
1300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1310b57cec5SDimitry Andric _mm_abs_epi32(__m128i __a)
1320b57cec5SDimitry Andric {
13304eeddc0SDimitry Andric     return (__m128i)__builtin_elementwise_abs((__v4si)__a);
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric /// Concatenates the two 128-bit integer vector operands, and
1370b57cec5SDimitry Andric ///    right-shifts the result by the number of bytes specified in the immediate
1380b57cec5SDimitry Andric ///    operand.
1390b57cec5SDimitry Andric ///
1400b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1410b57cec5SDimitry Andric ///
1420b57cec5SDimitry Andric /// \code
1430b57cec5SDimitry Andric /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
1440b57cec5SDimitry Andric /// \endcode
1450b57cec5SDimitry Andric ///
1460b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PALIGNR instruction.
1470b57cec5SDimitry Andric ///
1480b57cec5SDimitry Andric /// \param a
1490b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8] containing one of the source operands.
1500b57cec5SDimitry Andric /// \param b
1510b57cec5SDimitry Andric ///    A 128-bit vector of [16 x i8] containing one of the source operands.
1520b57cec5SDimitry Andric /// \param n
1530b57cec5SDimitry Andric ///    An immediate operand specifying how many bytes to right-shift the result.
1540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the concatenated right-shifted
1550b57cec5SDimitry Andric ///    value.
1560b57cec5SDimitry Andric #define _mm_alignr_epi8(a, b, n) \
157349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
158349cc55cSDimitry Andric                                       (__v16qi)(__m128i)(b), (n)))
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric /// Concatenates the two 64-bit integer vector operands, and right-shifts
1610b57cec5SDimitry Andric ///    the result by the number of bytes specified in the immediate operand.
1620b57cec5SDimitry Andric ///
1630b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1640b57cec5SDimitry Andric ///
1650b57cec5SDimitry Andric /// \code
1660b57cec5SDimitry Andric /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
1670b57cec5SDimitry Andric /// \endcode
1680b57cec5SDimitry Andric ///
1690b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PALIGNR instruction.
1700b57cec5SDimitry Andric ///
1710b57cec5SDimitry Andric /// \param a
1720b57cec5SDimitry Andric ///    A 64-bit vector of [8 x i8] containing one of the source operands.
1730b57cec5SDimitry Andric /// \param b
1740b57cec5SDimitry Andric ///    A 64-bit vector of [8 x i8] containing one of the source operands.
1750b57cec5SDimitry Andric /// \param n
1760b57cec5SDimitry Andric ///    An immediate operand specifying how many bytes to right-shift the result.
1770b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the concatenated right-shifted
1780b57cec5SDimitry Andric ///    value.
1790b57cec5SDimitry Andric #define _mm_alignr_pi8(a, b, n) \
180349cc55cSDimitry Andric   ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed
1830b57cec5SDimitry Andric ///    128-bit vectors of [8 x i16].
1840b57cec5SDimitry Andric ///
1850b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1860b57cec5SDimitry Andric ///
1870b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDW instruction.
1880b57cec5SDimitry Andric ///
1890b57cec5SDimitry Andric /// \param __a
1900b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
1910b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
1920b57cec5SDimitry Andric ///    destination.
1930b57cec5SDimitry Andric /// \param __b
1940b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
1950b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
1960b57cec5SDimitry Andric ///    destination.
1970b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
1980b57cec5SDimitry Andric ///    both operands.
1990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2000b57cec5SDimitry Andric _mm_hadd_epi16(__m128i __a, __m128i __b)
2010b57cec5SDimitry Andric {
2020b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed
2060b57cec5SDimitry Andric ///    128-bit vectors of [4 x i32].
2070b57cec5SDimitry Andric ///
2080b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2090b57cec5SDimitry Andric ///
2100b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDD instruction.
2110b57cec5SDimitry Andric ///
2120b57cec5SDimitry Andric /// \param __a
2130b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
2140b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
2150b57cec5SDimitry Andric ///    destination.
2160b57cec5SDimitry Andric /// \param __b
2170b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
2180b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
2190b57cec5SDimitry Andric ///    destination.
2200b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
2210b57cec5SDimitry Andric ///    both operands.
2220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2230b57cec5SDimitry Andric _mm_hadd_epi32(__m128i __a, __m128i __b)
2240b57cec5SDimitry Andric {
2250b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed
2290b57cec5SDimitry Andric ///    64-bit vectors of [4 x i16].
2300b57cec5SDimitry Andric ///
2310b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2320b57cec5SDimitry Andric ///
2330b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDW instruction.
2340b57cec5SDimitry Andric ///
2350b57cec5SDimitry Andric /// \param __a
2360b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
2370b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
2380b57cec5SDimitry Andric ///    destination.
2390b57cec5SDimitry Andric /// \param __b
2400b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
2410b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
2420b57cec5SDimitry Andric ///    destination.
2430b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
2440b57cec5SDimitry Andric ///    operands.
2450b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2460b57cec5SDimitry Andric _mm_hadd_pi16(__m64 __a, __m64 __b)
2470b57cec5SDimitry Andric {
2480b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed
2520b57cec5SDimitry Andric ///    64-bit vectors of [2 x i32].
2530b57cec5SDimitry Andric ///
2540b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2550b57cec5SDimitry Andric ///
2560b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDD instruction.
2570b57cec5SDimitry Andric ///
2580b57cec5SDimitry Andric /// \param __a
2590b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
2600b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
2610b57cec5SDimitry Andric ///    destination.
2620b57cec5SDimitry Andric /// \param __b
2630b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
2640b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
2650b57cec5SDimitry Andric ///    destination.
2660b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
2670b57cec5SDimitry Andric ///    operands.
2680b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2690b57cec5SDimitry Andric _mm_hadd_pi32(__m64 __a, __m64 __b)
2700b57cec5SDimitry Andric {
2710b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric 
274*0fca6ea1SDimitry Andric /// Horizontally adds, with saturation, the adjacent pairs of values contained
275*0fca6ea1SDimitry Andric ///    in two packed 128-bit vectors of [8 x i16].
276*0fca6ea1SDimitry Andric ///
277*0fca6ea1SDimitry Andric ///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
278*0fca6ea1SDimitry Andric ///    less than 0x8000 are saturated to 0x8000.
2790b57cec5SDimitry Andric ///
2800b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2810b57cec5SDimitry Andric ///
2820b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDSW instruction.
2830b57cec5SDimitry Andric ///
2840b57cec5SDimitry Andric /// \param __a
2850b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
2860b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
2870b57cec5SDimitry Andric ///    destination.
2880b57cec5SDimitry Andric /// \param __b
2890b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
2900b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
2910b57cec5SDimitry Andric ///    destination.
2920b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
2930b57cec5SDimitry Andric ///    sums of both operands.
2940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
2950b57cec5SDimitry Andric _mm_hadds_epi16(__m128i __a, __m128i __b)
2960b57cec5SDimitry Andric {
2970b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
2980b57cec5SDimitry Andric }
2990b57cec5SDimitry Andric 
300*0fca6ea1SDimitry Andric /// Horizontally adds, with saturation, the adjacent pairs of values contained
301*0fca6ea1SDimitry Andric ///    in two packed 64-bit vectors of [4 x i16].
302*0fca6ea1SDimitry Andric ///
303*0fca6ea1SDimitry Andric ///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
304*0fca6ea1SDimitry Andric ///    less than 0x8000 are saturated to 0x8000.
3050b57cec5SDimitry Andric ///
3060b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3070b57cec5SDimitry Andric ///
3080b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDSW instruction.
3090b57cec5SDimitry Andric ///
3100b57cec5SDimitry Andric /// \param __a
3110b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
3120b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the lower bits of the
3130b57cec5SDimitry Andric ///    destination.
3140b57cec5SDimitry Andric /// \param __b
3150b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
3160b57cec5SDimitry Andric ///    horizontal sums of the values are stored in the upper bits of the
3170b57cec5SDimitry Andric ///    destination.
3180b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
3190b57cec5SDimitry Andric ///    sums of both operands.
3200b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
3210b57cec5SDimitry Andric _mm_hadds_pi16(__m64 __a, __m64 __b)
3220b57cec5SDimitry Andric {
3230b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
3240b57cec5SDimitry Andric }
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2
3270b57cec5SDimitry Andric ///    packed 128-bit vectors of [8 x i16].
3280b57cec5SDimitry Andric ///
3290b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3300b57cec5SDimitry Andric ///
3310b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBW instruction.
3320b57cec5SDimitry Andric ///
3330b57cec5SDimitry Andric /// \param __a
3340b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
3350b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
3360b57cec5SDimitry Andric ///    the destination.
3370b57cec5SDimitry Andric /// \param __b
3380b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
3390b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
3400b57cec5SDimitry Andric ///    the destination.
3410b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
3420b57cec5SDimitry Andric ///    of both operands.
3430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3440b57cec5SDimitry Andric _mm_hsub_epi16(__m128i __a, __m128i __b)
3450b57cec5SDimitry Andric {
3460b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2
3500b57cec5SDimitry Andric ///    packed 128-bit vectors of [4 x i32].
3510b57cec5SDimitry Andric ///
3520b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3530b57cec5SDimitry Andric ///
3540b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBD instruction.
3550b57cec5SDimitry Andric ///
3560b57cec5SDimitry Andric /// \param __a
3570b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
3580b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
3590b57cec5SDimitry Andric ///    the destination.
3600b57cec5SDimitry Andric /// \param __b
3610b57cec5SDimitry Andric ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
3620b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
3630b57cec5SDimitry Andric ///    the destination.
3640b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
3650b57cec5SDimitry Andric ///    of both operands.
3660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3670b57cec5SDimitry Andric _mm_hsub_epi32(__m128i __a, __m128i __b)
3680b57cec5SDimitry Andric {
3690b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2
3730b57cec5SDimitry Andric ///    packed 64-bit vectors of [4 x i16].
3740b57cec5SDimitry Andric ///
3750b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3760b57cec5SDimitry Andric ///
3770b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBW instruction.
3780b57cec5SDimitry Andric ///
3790b57cec5SDimitry Andric /// \param __a
3800b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
3810b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
3820b57cec5SDimitry Andric ///    the destination.
3830b57cec5SDimitry Andric /// \param __b
3840b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
3850b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
3860b57cec5SDimitry Andric ///    the destination.
3870b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
3880b57cec5SDimitry Andric ///    of both operands.
3890b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
3900b57cec5SDimitry Andric _mm_hsub_pi16(__m64 __a, __m64 __b)
3910b57cec5SDimitry Andric {
3920b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2
3960b57cec5SDimitry Andric ///    packed 64-bit vectors of [2 x i32].
3970b57cec5SDimitry Andric ///
3980b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
3990b57cec5SDimitry Andric ///
4000b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBD instruction.
4010b57cec5SDimitry Andric ///
4020b57cec5SDimitry Andric /// \param __a
4030b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
4040b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
4050b57cec5SDimitry Andric ///    the destination.
4060b57cec5SDimitry Andric /// \param __b
4070b57cec5SDimitry Andric ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
4080b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
4090b57cec5SDimitry Andric ///    the destination.
4100b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
4110b57cec5SDimitry Andric ///    of both operands.
4120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
4130b57cec5SDimitry Andric _mm_hsub_pi32(__m64 __a, __m64 __b)
4140b57cec5SDimitry Andric {
4150b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
418*0fca6ea1SDimitry Andric /// Horizontally subtracts, with saturation, the adjacent pairs of values
419*0fca6ea1SDimitry Andric ///    contained in two packed 128-bit vectors of [8 x i16].
420*0fca6ea1SDimitry Andric ///
421*0fca6ea1SDimitry Andric ///    Positive differences greater than 0x7FFF are saturated to 0x7FFF.
422*0fca6ea1SDimitry Andric ///    Negative differences less than 0x8000 are saturated to 0x8000.
4230b57cec5SDimitry Andric ///
4240b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4250b57cec5SDimitry Andric ///
4260b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBSW instruction.
4270b57cec5SDimitry Andric ///
4280b57cec5SDimitry Andric /// \param __a
4290b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
4300b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
4310b57cec5SDimitry Andric ///    the destination.
4320b57cec5SDimitry Andric /// \param __b
4330b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
4340b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
4350b57cec5SDimitry Andric ///    the destination.
4360b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
4370b57cec5SDimitry Andric ///    differences of both operands.
4380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4390b57cec5SDimitry Andric _mm_hsubs_epi16(__m128i __a, __m128i __b)
4400b57cec5SDimitry Andric {
4410b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
4420b57cec5SDimitry Andric }
4430b57cec5SDimitry Andric 
444*0fca6ea1SDimitry Andric /// Horizontally subtracts, with saturation, the adjacent pairs of values
445*0fca6ea1SDimitry Andric ///    contained in two packed 64-bit vectors of [4 x i16].
446*0fca6ea1SDimitry Andric ///
447*0fca6ea1SDimitry Andric ///    Positive differences greater than 0x7FFF are saturated to 0x7FFF.
448*0fca6ea1SDimitry Andric ///    Negative differences less than 0x8000 are saturated to 0x8000.
4490b57cec5SDimitry Andric ///
4500b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4510b57cec5SDimitry Andric ///
4520b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBSW instruction.
4530b57cec5SDimitry Andric ///
4540b57cec5SDimitry Andric /// \param __a
4550b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
4560b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the lower bits of
4570b57cec5SDimitry Andric ///    the destination.
4580b57cec5SDimitry Andric /// \param __b
4590b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
4600b57cec5SDimitry Andric ///    horizontal differences between the values are stored in the upper bits of
4610b57cec5SDimitry Andric ///    the destination.
4620b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
4630b57cec5SDimitry Andric ///    differences of both operands.
4640b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
4650b57cec5SDimitry Andric _mm_hsubs_pi16(__m64 __a, __m64 __b)
4660b57cec5SDimitry Andric {
4670b57cec5SDimitry Andric     return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric /// Multiplies corresponding pairs of packed 8-bit unsigned integer
4710b57cec5SDimitry Andric ///    values contained in the first source operand and packed 8-bit signed
4720b57cec5SDimitry Andric ///    integer values contained in the second source operand, adds pairs of
4730b57cec5SDimitry Andric ///    contiguous products with signed saturation, and writes the 16-bit sums to
4740b57cec5SDimitry Andric ///    the corresponding bits in the destination.
4750b57cec5SDimitry Andric ///
4760b57cec5SDimitry Andric ///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
4770b57cec5SDimitry Andric ///    both operands are multiplied, and the sum of both results is written to
4780b57cec5SDimitry Andric ///    bits [15:0] of the destination.
4790b57cec5SDimitry Andric ///
4800b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
4810b57cec5SDimitry Andric ///
4820b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
4830b57cec5SDimitry Andric ///
4840b57cec5SDimitry Andric /// \param __a
4850b57cec5SDimitry Andric ///    A 128-bit integer vector containing the first source operand.
4860b57cec5SDimitry Andric /// \param __b
4870b57cec5SDimitry Andric ///    A 128-bit integer vector containing the second source operand.
4880b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the sums of products of both
4890b57cec5SDimitry Andric ///    operands: \n
4900b57cec5SDimitry Andric ///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
4910b57cec5SDimitry Andric ///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
4920b57cec5SDimitry Andric ///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
4930b57cec5SDimitry Andric ///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
4940b57cec5SDimitry Andric ///    \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
4950b57cec5SDimitry Andric ///    \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
4960b57cec5SDimitry Andric ///    \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
4970b57cec5SDimitry Andric ///    \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
4980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
4990b57cec5SDimitry Andric _mm_maddubs_epi16(__m128i __a, __m128i __b)
5000b57cec5SDimitry Andric {
5010b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric /// Multiplies corresponding pairs of packed 8-bit unsigned integer
5050b57cec5SDimitry Andric ///    values contained in the first source operand and packed 8-bit signed
5060b57cec5SDimitry Andric ///    integer values contained in the second source operand, adds pairs of
5070b57cec5SDimitry Andric ///    contiguous products with signed saturation, and writes the 16-bit sums to
5080b57cec5SDimitry Andric ///    the corresponding bits in the destination.
5090b57cec5SDimitry Andric ///
5100b57cec5SDimitry Andric ///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
5110b57cec5SDimitry Andric ///    both operands are multiplied, and the sum of both results is written to
5120b57cec5SDimitry Andric ///    bits [15:0] of the destination.
5130b57cec5SDimitry Andric ///
5140b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5150b57cec5SDimitry Andric ///
5160b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PMADDUBSW instruction.
5170b57cec5SDimitry Andric ///
5180b57cec5SDimitry Andric /// \param __a
5190b57cec5SDimitry Andric ///    A 64-bit integer vector containing the first source operand.
5200b57cec5SDimitry Andric /// \param __b
5210b57cec5SDimitry Andric ///    A 64-bit integer vector containing the second source operand.
5220b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the sums of products of both
5230b57cec5SDimitry Andric ///    operands: \n
5240b57cec5SDimitry Andric ///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
5250b57cec5SDimitry Andric ///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
5260b57cec5SDimitry Andric ///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
5270b57cec5SDimitry Andric ///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
5280b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
5290b57cec5SDimitry Andric _mm_maddubs_pi16(__m64 __a, __m64 __b)
5300b57cec5SDimitry Andric {
5310b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
5320b57cec5SDimitry Andric }
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
5350b57cec5SDimitry Andric ///    products to the 18 most significant bits by right-shifting, rounds the
5360b57cec5SDimitry Andric ///    truncated value by adding 1, and writes bits [16:1] to the destination.
5370b57cec5SDimitry Andric ///
5380b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5390b57cec5SDimitry Andric ///
5400b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPMULHRSW instruction.
5410b57cec5SDimitry Andric ///
5420b57cec5SDimitry Andric /// \param __a
5430b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands.
5440b57cec5SDimitry Andric /// \param __b
5450b57cec5SDimitry Andric ///    A 128-bit vector of [8 x i16] containing one of the source operands.
5460b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
5470b57cec5SDimitry Andric ///    products of both operands.
5480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
5490b57cec5SDimitry Andric _mm_mulhrs_epi16(__m128i __a, __m128i __b)
5500b57cec5SDimitry Andric {
5510b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
5520b57cec5SDimitry Andric }
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
5550b57cec5SDimitry Andric ///    products to the 18 most significant bits by right-shifting, rounds the
5560b57cec5SDimitry Andric ///    truncated value by adding 1, and writes bits [16:1] to the destination.
5570b57cec5SDimitry Andric ///
5580b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5590b57cec5SDimitry Andric ///
5600b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PMULHRSW instruction.
5610b57cec5SDimitry Andric ///
5620b57cec5SDimitry Andric /// \param __a
5630b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands.
5640b57cec5SDimitry Andric /// \param __b
5650b57cec5SDimitry Andric ///    A 64-bit vector of [4 x i16] containing one of the source operands.
5660b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
5670b57cec5SDimitry Andric ///    products of both operands.
5680b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
5690b57cec5SDimitry Andric _mm_mulhrs_pi16(__m64 __a, __m64 __b)
5700b57cec5SDimitry Andric {
5710b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
5720b57cec5SDimitry Andric }
5730b57cec5SDimitry Andric 
5740b57cec5SDimitry Andric /// Copies the 8-bit integers from a 128-bit integer vector to the
5750b57cec5SDimitry Andric ///    destination or clears 8-bit values in the destination, as specified by
5760b57cec5SDimitry Andric ///    the second source operand.
5770b57cec5SDimitry Andric ///
5780b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
5790b57cec5SDimitry Andric ///
5800b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSHUFB instruction.
5810b57cec5SDimitry Andric ///
5820b57cec5SDimitry Andric /// \param __a
5830b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be copied.
5840b57cec5SDimitry Andric /// \param __b
5850b57cec5SDimitry Andric ///    A 128-bit integer vector containing control bytes corresponding to
5860b57cec5SDimitry Andric ///    positions in the destination:
5870b57cec5SDimitry Andric ///    Bit 7: \n
5880b57cec5SDimitry Andric ///    1: Clear the corresponding byte in the destination. \n
5890b57cec5SDimitry Andric ///    0: Copy the selected source byte to the corresponding byte in the
5900b57cec5SDimitry Andric ///    destination. \n
5910b57cec5SDimitry Andric ///    Bits [6:4] Reserved.  \n
5920b57cec5SDimitry Andric ///    Bits [3:0] select the source byte to be copied.
5930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the copied or cleared values.
5940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
5950b57cec5SDimitry Andric _mm_shuffle_epi8(__m128i __a, __m128i __b)
5960b57cec5SDimitry Andric {
5970b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
5980b57cec5SDimitry Andric }
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric /// Copies the 8-bit integers from a 64-bit integer vector to the
6010b57cec5SDimitry Andric ///    destination or clears 8-bit values in the destination, as specified by
6020b57cec5SDimitry Andric ///    the second source operand.
6030b57cec5SDimitry Andric ///
6040b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6050b57cec5SDimitry Andric ///
6060b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSHUFB instruction.
6070b57cec5SDimitry Andric ///
6080b57cec5SDimitry Andric /// \param __a
6090b57cec5SDimitry Andric ///    A 64-bit integer vector containing the values to be copied.
6100b57cec5SDimitry Andric /// \param __b
6110b57cec5SDimitry Andric ///    A 64-bit integer vector containing control bytes corresponding to
6120b57cec5SDimitry Andric ///    positions in the destination:
6130b57cec5SDimitry Andric ///    Bit 7: \n
6140b57cec5SDimitry Andric ///    1: Clear the corresponding byte in the destination. \n
6150b57cec5SDimitry Andric ///    0: Copy the selected source byte to the corresponding byte in the
6160b57cec5SDimitry Andric ///    destination. \n
6170b57cec5SDimitry Andric ///    Bits [3:0] select the source byte to be copied.
6180b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the copied or cleared values.
6190b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
6200b57cec5SDimitry Andric _mm_shuffle_pi8(__m64 __a, __m64 __b)
6210b57cec5SDimitry Andric {
6220b57cec5SDimitry Andric     return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
6230b57cec5SDimitry Andric }
6240b57cec5SDimitry Andric 
6250b57cec5SDimitry Andric /// For each 8-bit integer in the first source operand, perform one of
6260b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
6270b57cec5SDimitry Andric ///
6280b57cec5SDimitry Andric ///    If the byte in the second source is negative, calculate the two's
6290b57cec5SDimitry Andric ///    complement of the corresponding byte in the first source, and write that
6300b57cec5SDimitry Andric ///    value to the destination. If the byte in the second source is positive,
6310b57cec5SDimitry Andric ///    copy the corresponding byte from the first source to the destination. If
6320b57cec5SDimitry Andric ///    the byte in the second source is zero, clear the corresponding byte in
6330b57cec5SDimitry Andric ///    the destination.
6340b57cec5SDimitry Andric ///
6350b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6360b57cec5SDimitry Andric ///
6370b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGNB instruction.
6380b57cec5SDimitry Andric ///
6390b57cec5SDimitry Andric /// \param __a
6400b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be copied.
6410b57cec5SDimitry Andric /// \param __b
6420b57cec5SDimitry Andric ///    A 128-bit integer vector containing control bytes corresponding to
6430b57cec5SDimitry Andric ///    positions in the destination.
6440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values.
6450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
6460b57cec5SDimitry Andric _mm_sign_epi8(__m128i __a, __m128i __b)
6470b57cec5SDimitry Andric {
6480b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
6490b57cec5SDimitry Andric }
6500b57cec5SDimitry Andric 
6510b57cec5SDimitry Andric /// For each 16-bit integer in the first source operand, perform one of
6520b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
6530b57cec5SDimitry Andric ///
6540b57cec5SDimitry Andric ///    If the word in the second source is negative, calculate the two's
6550b57cec5SDimitry Andric ///    complement of the corresponding word in the first source, and write that
6560b57cec5SDimitry Andric ///    value to the destination. If the word in the second source is positive,
6570b57cec5SDimitry Andric ///    copy the corresponding word from the first source to the destination. If
6580b57cec5SDimitry Andric ///    the word in the second source is zero, clear the corresponding word in
6590b57cec5SDimitry Andric ///    the destination.
6600b57cec5SDimitry Andric ///
6610b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6620b57cec5SDimitry Andric ///
6630b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGNW instruction.
6640b57cec5SDimitry Andric ///
6650b57cec5SDimitry Andric /// \param __a
6660b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be copied.
6670b57cec5SDimitry Andric /// \param __b
6680b57cec5SDimitry Andric ///    A 128-bit integer vector containing control words corresponding to
6690b57cec5SDimitry Andric ///    positions in the destination.
6700b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values.
6710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
6720b57cec5SDimitry Andric _mm_sign_epi16(__m128i __a, __m128i __b)
6730b57cec5SDimitry Andric {
6740b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
6750b57cec5SDimitry Andric }
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric /// For each 32-bit integer in the first source operand, perform one of
6780b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
6790b57cec5SDimitry Andric ///
6800b57cec5SDimitry Andric ///    If the doubleword in the second source is negative, calculate the two's
6810b57cec5SDimitry Andric ///    complement of the corresponding word in the first source, and write that
6820b57cec5SDimitry Andric ///    value to the destination. If the doubleword in the second source is
6830b57cec5SDimitry Andric ///    positive, copy the corresponding word from the first source to the
6840b57cec5SDimitry Andric ///    destination. If the doubleword in the second source is zero, clear the
6850b57cec5SDimitry Andric ///    corresponding word in the destination.
6860b57cec5SDimitry Andric ///
6870b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
6880b57cec5SDimitry Andric ///
6890b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGND instruction.
6900b57cec5SDimitry Andric ///
6910b57cec5SDimitry Andric /// \param __a
6920b57cec5SDimitry Andric ///    A 128-bit integer vector containing the values to be copied.
6930b57cec5SDimitry Andric /// \param __b
6940b57cec5SDimitry Andric ///    A 128-bit integer vector containing control doublewords corresponding to
6950b57cec5SDimitry Andric ///    positions in the destination.
6960b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values.
6970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
6980b57cec5SDimitry Andric _mm_sign_epi32(__m128i __a, __m128i __b)
6990b57cec5SDimitry Andric {
7000b57cec5SDimitry Andric     return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
7010b57cec5SDimitry Andric }
7020b57cec5SDimitry Andric 
7030b57cec5SDimitry Andric /// For each 8-bit integer in the first source operand, perform one of
7040b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
7050b57cec5SDimitry Andric ///
7060b57cec5SDimitry Andric ///    If the byte in the second source is negative, calculate the two's
7070b57cec5SDimitry Andric ///    complement of the corresponding byte in the first source, and write that
7080b57cec5SDimitry Andric ///    value to the destination. If the byte in the second source is positive,
7090b57cec5SDimitry Andric ///    copy the corresponding byte from the first source to the destination. If
7100b57cec5SDimitry Andric ///    the byte in the second source is zero, clear the corresponding byte in
7110b57cec5SDimitry Andric ///    the destination.
7120b57cec5SDimitry Andric ///
7130b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7140b57cec5SDimitry Andric ///
7150b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGNB instruction.
7160b57cec5SDimitry Andric ///
7170b57cec5SDimitry Andric /// \param __a
7180b57cec5SDimitry Andric ///    A 64-bit integer vector containing the values to be copied.
7190b57cec5SDimitry Andric /// \param __b
7200b57cec5SDimitry Andric ///    A 64-bit integer vector containing control bytes corresponding to
7210b57cec5SDimitry Andric ///    positions in the destination.
7220b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values.
7230b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
7240b57cec5SDimitry Andric _mm_sign_pi8(__m64 __a, __m64 __b)
7250b57cec5SDimitry Andric {
7260b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric 
7290b57cec5SDimitry Andric /// For each 16-bit integer in the first source operand, perform one of
7300b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
7310b57cec5SDimitry Andric ///
7320b57cec5SDimitry Andric ///    If the word in the second source is negative, calculate the two's
7330b57cec5SDimitry Andric ///    complement of the corresponding word in the first source, and write that
7340b57cec5SDimitry Andric ///    value to the destination. If the word in the second source is positive,
7350b57cec5SDimitry Andric ///    copy the corresponding word from the first source to the destination. If
7360b57cec5SDimitry Andric ///    the word in the second source is zero, clear the corresponding word in
7370b57cec5SDimitry Andric ///    the destination.
7380b57cec5SDimitry Andric ///
7390b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7400b57cec5SDimitry Andric ///
7410b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGNW instruction.
7420b57cec5SDimitry Andric ///
7430b57cec5SDimitry Andric /// \param __a
7440b57cec5SDimitry Andric ///    A 64-bit integer vector containing the values to be copied.
7450b57cec5SDimitry Andric /// \param __b
7460b57cec5SDimitry Andric ///    A 64-bit integer vector containing control words corresponding to
7470b57cec5SDimitry Andric ///    positions in the destination.
7480b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values.
7490b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
7500b57cec5SDimitry Andric _mm_sign_pi16(__m64 __a, __m64 __b)
7510b57cec5SDimitry Andric {
7520b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
7530b57cec5SDimitry Andric }
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric /// For each 32-bit integer in the first source operand, perform one of
7560b57cec5SDimitry Andric ///    the following actions as specified by the second source operand.
7570b57cec5SDimitry Andric ///
7580b57cec5SDimitry Andric ///    If the doubleword in the second source is negative, calculate the two's
7590b57cec5SDimitry Andric ///    complement of the corresponding doubleword in the first source, and
7600b57cec5SDimitry Andric ///    write that value to the destination. If the doubleword in the second
7610b57cec5SDimitry Andric ///    source is positive, copy the corresponding doubleword from the first
7620b57cec5SDimitry Andric ///    source to the destination. If the doubleword in the second source is
7630b57cec5SDimitry Andric ///    zero, clear the corresponding doubleword in the destination.
7640b57cec5SDimitry Andric ///
7650b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
7660b57cec5SDimitry Andric ///
7670b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGND instruction.
7680b57cec5SDimitry Andric ///
7690b57cec5SDimitry Andric /// \param __a
7700b57cec5SDimitry Andric ///    A 64-bit integer vector containing the values to be copied.
7710b57cec5SDimitry Andric /// \param __b
7720b57cec5SDimitry Andric ///    A 64-bit integer vector containing two control doublewords corresponding
7730b57cec5SDimitry Andric ///    to positions in the destination.
7740b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values.
7750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
7760b57cec5SDimitry Andric _mm_sign_pi32(__m64 __a, __m64 __b)
7770b57cec5SDimitry Andric {
7780b57cec5SDimitry Andric     return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
7790b57cec5SDimitry Andric }
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
7820b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric #endif /* __TMMINTRIN_H */
785