10b57cec5SDimitry Andric /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __TMMINTRIN_H 110b57cec5SDimitry Andric #define __TMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <pmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 205f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 215f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 225f757f3fSDimitry Andric __target__("ssse3,no-evex512"), __min_vector_width__(64))) 235f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS_MMX \ 245f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 255f757f3fSDimitry Andric __target__("mmx,ssse3,no-evex512"), \ 265f757f3fSDimitry Andric __min_vector_width__(64))) 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 8-bit signed 290b57cec5SDimitry Andric /// integers in the source operand and stores the 8-bit unsigned integer 300b57cec5SDimitry Andric /// results in the destination. 310b57cec5SDimitry Andric /// 320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 330b57cec5SDimitry Andric /// 340b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSB instruction. 350b57cec5SDimitry Andric /// 360b57cec5SDimitry Andric /// \param __a 370b57cec5SDimitry Andric /// A 64-bit vector of [8 x i8]. 380b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the 390b57cec5SDimitry Andric /// elements in the operand. 400b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 410b57cec5SDimitry Andric _mm_abs_pi8(__m64 __a) 420b57cec5SDimitry Andric { 430b57cec5SDimitry Andric return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 440b57cec5SDimitry Andric } 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 8-bit signed 470b57cec5SDimitry Andric /// integers in the source operand and stores the 8-bit unsigned integer 480b57cec5SDimitry Andric /// results in the destination. 490b57cec5SDimitry Andric /// 500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 510b57cec5SDimitry Andric /// 520b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSB instruction. 530b57cec5SDimitry Andric /// 540b57cec5SDimitry Andric /// \param __a 550b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 560b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the 570b57cec5SDimitry Andric /// elements in the operand. 580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 590b57cec5SDimitry Andric _mm_abs_epi8(__m128i __a) 600b57cec5SDimitry Andric { 6104eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_abs((__v16qs)__a); 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 16-bit signed 650b57cec5SDimitry Andric /// integers in the source operand and stores the 16-bit unsigned integer 660b57cec5SDimitry Andric /// results in the destination. 670b57cec5SDimitry Andric /// 680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 690b57cec5SDimitry Andric /// 700b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSW instruction. 710b57cec5SDimitry Andric /// 720b57cec5SDimitry Andric /// \param __a 730b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16]. 740b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the 750b57cec5SDimitry Andric /// elements in the operand. 760b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 770b57cec5SDimitry Andric _mm_abs_pi16(__m64 __a) 780b57cec5SDimitry Andric { 790b57cec5SDimitry Andric return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 16-bit signed 830b57cec5SDimitry Andric /// integers in the source operand and stores the 16-bit unsigned integer 840b57cec5SDimitry Andric /// results in the destination. 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 870b57cec5SDimitry Andric /// 880b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSW instruction. 890b57cec5SDimitry Andric /// 900b57cec5SDimitry Andric /// \param __a 910b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the 930b57cec5SDimitry Andric /// elements in the operand. 940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 950b57cec5SDimitry Andric _mm_abs_epi16(__m128i __a) 960b57cec5SDimitry Andric { 9704eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_abs((__v8hi)__a); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 32-bit signed 1010b57cec5SDimitry Andric /// integers in the source operand and stores the 32-bit unsigned integer 1020b57cec5SDimitry Andric /// results in the destination. 1030b57cec5SDimitry Andric /// 1040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1050b57cec5SDimitry Andric /// 1060b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PABSD instruction. 1070b57cec5SDimitry Andric /// 1080b57cec5SDimitry Andric /// \param __a 1090b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 1100b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the absolute values of the 1110b57cec5SDimitry Andric /// elements in the operand. 1120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 1130b57cec5SDimitry Andric _mm_abs_pi32(__m64 __a) 1140b57cec5SDimitry Andric { 1150b57cec5SDimitry Andric return (__m64)__builtin_ia32_pabsd((__v2si)__a); 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric /// Computes the absolute value of each of the packed 32-bit signed 1190b57cec5SDimitry Andric /// integers in the source operand and stores the 32-bit unsigned integer 1200b57cec5SDimitry Andric /// results in the destination. 1210b57cec5SDimitry Andric /// 1220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1230b57cec5SDimitry Andric /// 1240b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPABSD instruction. 1250b57cec5SDimitry Andric /// 1260b57cec5SDimitry Andric /// \param __a 1270b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 1280b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the absolute values of the 1290b57cec5SDimitry Andric /// elements in the operand. 1300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1310b57cec5SDimitry Andric _mm_abs_epi32(__m128i __a) 1320b57cec5SDimitry Andric { 13304eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_abs((__v4si)__a); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric /// Concatenates the two 128-bit integer vector operands, and 1370b57cec5SDimitry Andric /// right-shifts the result by the number of bytes specified in the immediate 1380b57cec5SDimitry Andric /// operand. 1390b57cec5SDimitry Andric /// 1400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1410b57cec5SDimitry Andric /// 1420b57cec5SDimitry Andric /// \code 1430b57cec5SDimitry Andric /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 1440b57cec5SDimitry Andric /// \endcode 1450b57cec5SDimitry Andric /// 1460b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PALIGNR instruction. 1470b57cec5SDimitry Andric /// 1480b57cec5SDimitry Andric /// \param a 1490b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8] containing one of the source operands. 1500b57cec5SDimitry Andric /// \param b 1510b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8] containing one of the source operands. 1520b57cec5SDimitry Andric /// \param n 1530b57cec5SDimitry Andric /// An immediate operand specifying how many bytes to right-shift the result. 1540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the concatenated right-shifted 1550b57cec5SDimitry Andric /// value. 1560b57cec5SDimitry Andric #define _mm_alignr_epi8(a, b, n) \ 157349cc55cSDimitry Andric ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 158349cc55cSDimitry Andric (__v16qi)(__m128i)(b), (n))) 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric /// Concatenates the two 64-bit integer vector operands, and right-shifts 1610b57cec5SDimitry Andric /// the result by the number of bytes specified in the immediate operand. 1620b57cec5SDimitry Andric /// 1630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1640b57cec5SDimitry Andric /// 1650b57cec5SDimitry Andric /// \code 1660b57cec5SDimitry Andric /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 1670b57cec5SDimitry Andric /// \endcode 1680b57cec5SDimitry Andric /// 1690b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PALIGNR instruction. 1700b57cec5SDimitry Andric /// 1710b57cec5SDimitry Andric /// \param a 1720b57cec5SDimitry Andric /// A 64-bit vector of [8 x i8] containing one of the source operands. 1730b57cec5SDimitry Andric /// \param b 1740b57cec5SDimitry Andric /// A 64-bit vector of [8 x i8] containing one of the source operands. 1750b57cec5SDimitry Andric /// \param n 1760b57cec5SDimitry Andric /// An immediate operand specifying how many bytes to right-shift the result. 1770b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the concatenated right-shifted 1780b57cec5SDimitry Andric /// value. 1790b57cec5SDimitry Andric #define _mm_alignr_pi8(a, b, n) \ 180349cc55cSDimitry Andric ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed 1830b57cec5SDimitry Andric /// 128-bit vectors of [8 x i16]. 1840b57cec5SDimitry Andric /// 1850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1860b57cec5SDimitry Andric /// 1870b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDW instruction. 1880b57cec5SDimitry Andric /// 1890b57cec5SDimitry Andric /// \param __a 1900b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 1910b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 1920b57cec5SDimitry Andric /// destination. 1930b57cec5SDimitry Andric /// \param __b 1940b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 1950b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 1960b57cec5SDimitry Andric /// destination. 1970b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 1980b57cec5SDimitry Andric /// both operands. 1990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2000b57cec5SDimitry Andric _mm_hadd_epi16(__m128i __a, __m128i __b) 2010b57cec5SDimitry Andric { 2020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed 2060b57cec5SDimitry Andric /// 128-bit vectors of [4 x i32]. 2070b57cec5SDimitry Andric /// 2080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2090b57cec5SDimitry Andric /// 2100b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDD instruction. 2110b57cec5SDimitry Andric /// 2120b57cec5SDimitry Andric /// \param __a 2130b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32] containing one of the source operands. The 2140b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 2150b57cec5SDimitry Andric /// destination. 2160b57cec5SDimitry Andric /// \param __b 2170b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32] containing one of the source operands. The 2180b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 2190b57cec5SDimitry Andric /// destination. 2200b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 2210b57cec5SDimitry Andric /// both operands. 2220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2230b57cec5SDimitry Andric _mm_hadd_epi32(__m128i __a, __m128i __b) 2240b57cec5SDimitry Andric { 2250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed 2290b57cec5SDimitry Andric /// 64-bit vectors of [4 x i16]. 2300b57cec5SDimitry Andric /// 2310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2320b57cec5SDimitry Andric /// 2330b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDW instruction. 2340b57cec5SDimitry Andric /// 2350b57cec5SDimitry Andric /// \param __a 2360b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 2370b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 2380b57cec5SDimitry Andric /// destination. 2390b57cec5SDimitry Andric /// \param __b 2400b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 2410b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 2420b57cec5SDimitry Andric /// destination. 2430b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 2440b57cec5SDimitry Andric /// operands. 2450b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 2460b57cec5SDimitry Andric _mm_hadd_pi16(__m64 __a, __m64 __b) 2470b57cec5SDimitry Andric { 2480b57cec5SDimitry Andric return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in 2 packed 2520b57cec5SDimitry Andric /// 64-bit vectors of [2 x i32]. 2530b57cec5SDimitry Andric /// 2540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2550b57cec5SDimitry Andric /// 2560b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDD instruction. 2570b57cec5SDimitry Andric /// 2580b57cec5SDimitry Andric /// \param __a 2590b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32] containing one of the source operands. The 2600b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 2610b57cec5SDimitry Andric /// destination. 2620b57cec5SDimitry Andric /// \param __b 2630b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32] containing one of the source operands. The 2640b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 2650b57cec5SDimitry Andric /// destination. 2660b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 2670b57cec5SDimitry Andric /// operands. 2680b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 2690b57cec5SDimitry Andric _mm_hadd_pi32(__m64 __a, __m64 __b) 2700b57cec5SDimitry Andric { 2710b57cec5SDimitry Andric return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 274*0fca6ea1SDimitry Andric /// Horizontally adds, with saturation, the adjacent pairs of values contained 275*0fca6ea1SDimitry Andric /// in two packed 128-bit vectors of [8 x i16]. 276*0fca6ea1SDimitry Andric /// 277*0fca6ea1SDimitry Andric /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 278*0fca6ea1SDimitry Andric /// less than 0x8000 are saturated to 0x8000. 2790b57cec5SDimitry Andric /// 2800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2810b57cec5SDimitry Andric /// 2820b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHADDSW instruction. 2830b57cec5SDimitry Andric /// 2840b57cec5SDimitry Andric /// \param __a 2850b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 2860b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 2870b57cec5SDimitry Andric /// destination. 2880b57cec5SDimitry Andric /// \param __b 2890b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 2900b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 2910b57cec5SDimitry Andric /// destination. 2920b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 2930b57cec5SDimitry Andric /// sums of both operands. 2940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 2950b57cec5SDimitry Andric _mm_hadds_epi16(__m128i __a, __m128i __b) 2960b57cec5SDimitry Andric { 2970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric 300*0fca6ea1SDimitry Andric /// Horizontally adds, with saturation, the adjacent pairs of values contained 301*0fca6ea1SDimitry Andric /// in two packed 64-bit vectors of [4 x i16]. 302*0fca6ea1SDimitry Andric /// 303*0fca6ea1SDimitry Andric /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 304*0fca6ea1SDimitry Andric /// less than 0x8000 are saturated to 0x8000. 3050b57cec5SDimitry Andric /// 3060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3070b57cec5SDimitry Andric /// 3080b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHADDSW instruction. 3090b57cec5SDimitry Andric /// 3100b57cec5SDimitry Andric /// \param __a 3110b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 3120b57cec5SDimitry Andric /// horizontal sums of the values are stored in the lower bits of the 3130b57cec5SDimitry Andric /// destination. 3140b57cec5SDimitry Andric /// \param __b 3150b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 3160b57cec5SDimitry Andric /// horizontal sums of the values are stored in the upper bits of the 3170b57cec5SDimitry Andric /// destination. 3180b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 3190b57cec5SDimitry Andric /// sums of both operands. 3200b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 3210b57cec5SDimitry Andric _mm_hadds_pi16(__m64 __a, __m64 __b) 3220b57cec5SDimitry Andric { 3230b57cec5SDimitry Andric return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 3240b57cec5SDimitry Andric } 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2 3270b57cec5SDimitry Andric /// packed 128-bit vectors of [8 x i16]. 3280b57cec5SDimitry Andric /// 3290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3300b57cec5SDimitry Andric /// 3310b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBW instruction. 3320b57cec5SDimitry Andric /// 3330b57cec5SDimitry Andric /// \param __a 3340b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 3350b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 3360b57cec5SDimitry Andric /// the destination. 3370b57cec5SDimitry Andric /// \param __b 3380b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 3390b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 3400b57cec5SDimitry Andric /// the destination. 3410b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 3420b57cec5SDimitry Andric /// of both operands. 3430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3440b57cec5SDimitry Andric _mm_hsub_epi16(__m128i __a, __m128i __b) 3450b57cec5SDimitry Andric { 3460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2 3500b57cec5SDimitry Andric /// packed 128-bit vectors of [4 x i32]. 3510b57cec5SDimitry Andric /// 3520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3530b57cec5SDimitry Andric /// 3540b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBD instruction. 3550b57cec5SDimitry Andric /// 3560b57cec5SDimitry Andric /// \param __a 3570b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32] containing one of the source operands. The 3580b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 3590b57cec5SDimitry Andric /// the destination. 3600b57cec5SDimitry Andric /// \param __b 3610b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32] containing one of the source operands. The 3620b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 3630b57cec5SDimitry Andric /// the destination. 3640b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 3650b57cec5SDimitry Andric /// of both operands. 3660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3670b57cec5SDimitry Andric _mm_hsub_epi32(__m128i __a, __m128i __b) 3680b57cec5SDimitry Andric { 3690b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2 3730b57cec5SDimitry Andric /// packed 64-bit vectors of [4 x i16]. 3740b57cec5SDimitry Andric /// 3750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3760b57cec5SDimitry Andric /// 3770b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBW instruction. 3780b57cec5SDimitry Andric /// 3790b57cec5SDimitry Andric /// \param __a 3800b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 3810b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 3820b57cec5SDimitry Andric /// the destination. 3830b57cec5SDimitry Andric /// \param __b 3840b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 3850b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 3860b57cec5SDimitry Andric /// the destination. 3870b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 3880b57cec5SDimitry Andric /// of both operands. 3890b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 3900b57cec5SDimitry Andric _mm_hsub_pi16(__m64 __a, __m64 __b) 3910b57cec5SDimitry Andric { 3920b57cec5SDimitry Andric return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in 2 3960b57cec5SDimitry Andric /// packed 64-bit vectors of [2 x i32]. 3970b57cec5SDimitry Andric /// 3980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3990b57cec5SDimitry Andric /// 4000b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBD instruction. 4010b57cec5SDimitry Andric /// 4020b57cec5SDimitry Andric /// \param __a 4030b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32] containing one of the source operands. The 4040b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 4050b57cec5SDimitry Andric /// the destination. 4060b57cec5SDimitry Andric /// \param __b 4070b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32] containing one of the source operands. The 4080b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 4090b57cec5SDimitry Andric /// the destination. 4100b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 4110b57cec5SDimitry Andric /// of both operands. 4120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 4130b57cec5SDimitry Andric _mm_hsub_pi32(__m64 __a, __m64 __b) 4140b57cec5SDimitry Andric { 4150b57cec5SDimitry Andric return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 418*0fca6ea1SDimitry Andric /// Horizontally subtracts, with saturation, the adjacent pairs of values 419*0fca6ea1SDimitry Andric /// contained in two packed 128-bit vectors of [8 x i16]. 420*0fca6ea1SDimitry Andric /// 421*0fca6ea1SDimitry Andric /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. 422*0fca6ea1SDimitry Andric /// Negative differences less than 0x8000 are saturated to 0x8000. 4230b57cec5SDimitry Andric /// 4240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4250b57cec5SDimitry Andric /// 4260b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPHSUBSW instruction. 4270b57cec5SDimitry Andric /// 4280b57cec5SDimitry Andric /// \param __a 4290b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 4300b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 4310b57cec5SDimitry Andric /// the destination. 4320b57cec5SDimitry Andric /// \param __b 4330b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. The 4340b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 4350b57cec5SDimitry Andric /// the destination. 4360b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 4370b57cec5SDimitry Andric /// differences of both operands. 4380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4390b57cec5SDimitry Andric _mm_hsubs_epi16(__m128i __a, __m128i __b) 4400b57cec5SDimitry Andric { 4410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 4420b57cec5SDimitry Andric } 4430b57cec5SDimitry Andric 444*0fca6ea1SDimitry Andric /// Horizontally subtracts, with saturation, the adjacent pairs of values 445*0fca6ea1SDimitry Andric /// contained in two packed 64-bit vectors of [4 x i16]. 446*0fca6ea1SDimitry Andric /// 447*0fca6ea1SDimitry Andric /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. 448*0fca6ea1SDimitry Andric /// Negative differences less than 0x8000 are saturated to 0x8000. 4490b57cec5SDimitry Andric /// 4500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4510b57cec5SDimitry Andric /// 4520b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PHSUBSW instruction. 4530b57cec5SDimitry Andric /// 4540b57cec5SDimitry Andric /// \param __a 4550b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 4560b57cec5SDimitry Andric /// horizontal differences between the values are stored in the lower bits of 4570b57cec5SDimitry Andric /// the destination. 4580b57cec5SDimitry Andric /// \param __b 4590b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. The 4600b57cec5SDimitry Andric /// horizontal differences between the values are stored in the upper bits of 4610b57cec5SDimitry Andric /// the destination. 4620b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 4630b57cec5SDimitry Andric /// differences of both operands. 4640b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 4650b57cec5SDimitry Andric _mm_hsubs_pi16(__m64 __a, __m64 __b) 4660b57cec5SDimitry Andric { 4670b57cec5SDimitry Andric return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric /// Multiplies corresponding pairs of packed 8-bit unsigned integer 4710b57cec5SDimitry Andric /// values contained in the first source operand and packed 8-bit signed 4720b57cec5SDimitry Andric /// integer values contained in the second source operand, adds pairs of 4730b57cec5SDimitry Andric /// contiguous products with signed saturation, and writes the 16-bit sums to 4740b57cec5SDimitry Andric /// the corresponding bits in the destination. 4750b57cec5SDimitry Andric /// 4760b57cec5SDimitry Andric /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 4770b57cec5SDimitry Andric /// both operands are multiplied, and the sum of both results is written to 4780b57cec5SDimitry Andric /// bits [15:0] of the destination. 4790b57cec5SDimitry Andric /// 4800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4810b57cec5SDimitry Andric /// 4820b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPMADDUBSW instruction. 4830b57cec5SDimitry Andric /// 4840b57cec5SDimitry Andric /// \param __a 4850b57cec5SDimitry Andric /// A 128-bit integer vector containing the first source operand. 4860b57cec5SDimitry Andric /// \param __b 4870b57cec5SDimitry Andric /// A 128-bit integer vector containing the second source operand. 4880b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the sums of products of both 4890b57cec5SDimitry Andric /// operands: \n 4900b57cec5SDimitry Andric /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 4910b57cec5SDimitry Andric /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 4920b57cec5SDimitry Andric /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 4930b57cec5SDimitry Andric /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 4940b57cec5SDimitry Andric /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 4950b57cec5SDimitry Andric /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 4960b57cec5SDimitry Andric /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 4970b57cec5SDimitry Andric /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 4980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 4990b57cec5SDimitry Andric _mm_maddubs_epi16(__m128i __a, __m128i __b) 5000b57cec5SDimitry Andric { 5010b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric /// Multiplies corresponding pairs of packed 8-bit unsigned integer 5050b57cec5SDimitry Andric /// values contained in the first source operand and packed 8-bit signed 5060b57cec5SDimitry Andric /// integer values contained in the second source operand, adds pairs of 5070b57cec5SDimitry Andric /// contiguous products with signed saturation, and writes the 16-bit sums to 5080b57cec5SDimitry Andric /// the corresponding bits in the destination. 5090b57cec5SDimitry Andric /// 5100b57cec5SDimitry Andric /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 5110b57cec5SDimitry Andric /// both operands are multiplied, and the sum of both results is written to 5120b57cec5SDimitry Andric /// bits [15:0] of the destination. 5130b57cec5SDimitry Andric /// 5140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5150b57cec5SDimitry Andric /// 5160b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PMADDUBSW instruction. 5170b57cec5SDimitry Andric /// 5180b57cec5SDimitry Andric /// \param __a 5190b57cec5SDimitry Andric /// A 64-bit integer vector containing the first source operand. 5200b57cec5SDimitry Andric /// \param __b 5210b57cec5SDimitry Andric /// A 64-bit integer vector containing the second source operand. 5220b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the sums of products of both 5230b57cec5SDimitry Andric /// operands: \n 5240b57cec5SDimitry Andric /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 5250b57cec5SDimitry Andric /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 5260b57cec5SDimitry Andric /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 5270b57cec5SDimitry Andric /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 5280b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 5290b57cec5SDimitry Andric _mm_maddubs_pi16(__m64 __a, __m64 __b) 5300b57cec5SDimitry Andric { 5310b57cec5SDimitry Andric return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 5350b57cec5SDimitry Andric /// products to the 18 most significant bits by right-shifting, rounds the 5360b57cec5SDimitry Andric /// truncated value by adding 1, and writes bits [16:1] to the destination. 5370b57cec5SDimitry Andric /// 5380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5390b57cec5SDimitry Andric /// 5400b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPMULHRSW instruction. 5410b57cec5SDimitry Andric /// 5420b57cec5SDimitry Andric /// \param __a 5430b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. 5440b57cec5SDimitry Andric /// \param __b 5450b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16] containing one of the source operands. 5460b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 5470b57cec5SDimitry Andric /// products of both operands. 5480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 5490b57cec5SDimitry Andric _mm_mulhrs_epi16(__m128i __a, __m128i __b) 5500b57cec5SDimitry Andric { 5510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 5550b57cec5SDimitry Andric /// products to the 18 most significant bits by right-shifting, rounds the 5560b57cec5SDimitry Andric /// truncated value by adding 1, and writes bits [16:1] to the destination. 5570b57cec5SDimitry Andric /// 5580b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5590b57cec5SDimitry Andric /// 5600b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PMULHRSW instruction. 5610b57cec5SDimitry Andric /// 5620b57cec5SDimitry Andric /// \param __a 5630b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. 5640b57cec5SDimitry Andric /// \param __b 5650b57cec5SDimitry Andric /// A 64-bit vector of [4 x i16] containing one of the source operands. 5660b57cec5SDimitry Andric /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 5670b57cec5SDimitry Andric /// products of both operands. 5680b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 5690b57cec5SDimitry Andric _mm_mulhrs_pi16(__m64 __a, __m64 __b) 5700b57cec5SDimitry Andric { 5710b57cec5SDimitry Andric return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric /// Copies the 8-bit integers from a 128-bit integer vector to the 5750b57cec5SDimitry Andric /// destination or clears 8-bit values in the destination, as specified by 5760b57cec5SDimitry Andric /// the second source operand. 5770b57cec5SDimitry Andric /// 5780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5790b57cec5SDimitry Andric /// 5800b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSHUFB instruction. 5810b57cec5SDimitry Andric /// 5820b57cec5SDimitry Andric /// \param __a 5830b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 5840b57cec5SDimitry Andric /// \param __b 5850b57cec5SDimitry Andric /// A 128-bit integer vector containing control bytes corresponding to 5860b57cec5SDimitry Andric /// positions in the destination: 5870b57cec5SDimitry Andric /// Bit 7: \n 5880b57cec5SDimitry Andric /// 1: Clear the corresponding byte in the destination. \n 5890b57cec5SDimitry Andric /// 0: Copy the selected source byte to the corresponding byte in the 5900b57cec5SDimitry Andric /// destination. \n 5910b57cec5SDimitry Andric /// Bits [6:4] Reserved. \n 5920b57cec5SDimitry Andric /// Bits [3:0] select the source byte to be copied. 5930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the copied or cleared values. 5940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 5950b57cec5SDimitry Andric _mm_shuffle_epi8(__m128i __a, __m128i __b) 5960b57cec5SDimitry Andric { 5970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric /// Copies the 8-bit integers from a 64-bit integer vector to the 6010b57cec5SDimitry Andric /// destination or clears 8-bit values in the destination, as specified by 6020b57cec5SDimitry Andric /// the second source operand. 6030b57cec5SDimitry Andric /// 6040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6050b57cec5SDimitry Andric /// 6060b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSHUFB instruction. 6070b57cec5SDimitry Andric /// 6080b57cec5SDimitry Andric /// \param __a 6090b57cec5SDimitry Andric /// A 64-bit integer vector containing the values to be copied. 6100b57cec5SDimitry Andric /// \param __b 6110b57cec5SDimitry Andric /// A 64-bit integer vector containing control bytes corresponding to 6120b57cec5SDimitry Andric /// positions in the destination: 6130b57cec5SDimitry Andric /// Bit 7: \n 6140b57cec5SDimitry Andric /// 1: Clear the corresponding byte in the destination. \n 6150b57cec5SDimitry Andric /// 0: Copy the selected source byte to the corresponding byte in the 6160b57cec5SDimitry Andric /// destination. \n 6170b57cec5SDimitry Andric /// Bits [3:0] select the source byte to be copied. 6180b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the copied or cleared values. 6190b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 6200b57cec5SDimitry Andric _mm_shuffle_pi8(__m64 __a, __m64 __b) 6210b57cec5SDimitry Andric { 6220b57cec5SDimitry Andric return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric /// For each 8-bit integer in the first source operand, perform one of 6260b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 6270b57cec5SDimitry Andric /// 6280b57cec5SDimitry Andric /// If the byte in the second source is negative, calculate the two's 6290b57cec5SDimitry Andric /// complement of the corresponding byte in the first source, and write that 6300b57cec5SDimitry Andric /// value to the destination. If the byte in the second source is positive, 6310b57cec5SDimitry Andric /// copy the corresponding byte from the first source to the destination. If 6320b57cec5SDimitry Andric /// the byte in the second source is zero, clear the corresponding byte in 6330b57cec5SDimitry Andric /// the destination. 6340b57cec5SDimitry Andric /// 6350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6360b57cec5SDimitry Andric /// 6370b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGNB instruction. 6380b57cec5SDimitry Andric /// 6390b57cec5SDimitry Andric /// \param __a 6400b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 6410b57cec5SDimitry Andric /// \param __b 6420b57cec5SDimitry Andric /// A 128-bit integer vector containing control bytes corresponding to 6430b57cec5SDimitry Andric /// positions in the destination. 6440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values. 6450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 6460b57cec5SDimitry Andric _mm_sign_epi8(__m128i __a, __m128i __b) 6470b57cec5SDimitry Andric { 6480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric /// For each 16-bit integer in the first source operand, perform one of 6520b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 6530b57cec5SDimitry Andric /// 6540b57cec5SDimitry Andric /// If the word in the second source is negative, calculate the two's 6550b57cec5SDimitry Andric /// complement of the corresponding word in the first source, and write that 6560b57cec5SDimitry Andric /// value to the destination. If the word in the second source is positive, 6570b57cec5SDimitry Andric /// copy the corresponding word from the first source to the destination. If 6580b57cec5SDimitry Andric /// the word in the second source is zero, clear the corresponding word in 6590b57cec5SDimitry Andric /// the destination. 6600b57cec5SDimitry Andric /// 6610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6620b57cec5SDimitry Andric /// 6630b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGNW instruction. 6640b57cec5SDimitry Andric /// 6650b57cec5SDimitry Andric /// \param __a 6660b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 6670b57cec5SDimitry Andric /// \param __b 6680b57cec5SDimitry Andric /// A 128-bit integer vector containing control words corresponding to 6690b57cec5SDimitry Andric /// positions in the destination. 6700b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values. 6710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 6720b57cec5SDimitry Andric _mm_sign_epi16(__m128i __a, __m128i __b) 6730b57cec5SDimitry Andric { 6740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 6750b57cec5SDimitry Andric } 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric /// For each 32-bit integer in the first source operand, perform one of 6780b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 6790b57cec5SDimitry Andric /// 6800b57cec5SDimitry Andric /// If the doubleword in the second source is negative, calculate the two's 6810b57cec5SDimitry Andric /// complement of the corresponding word in the first source, and write that 6820b57cec5SDimitry Andric /// value to the destination. If the doubleword in the second source is 6830b57cec5SDimitry Andric /// positive, copy the corresponding word from the first source to the 6840b57cec5SDimitry Andric /// destination. If the doubleword in the second source is zero, clear the 6850b57cec5SDimitry Andric /// corresponding word in the destination. 6860b57cec5SDimitry Andric /// 6870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6880b57cec5SDimitry Andric /// 6890b57cec5SDimitry Andric /// This intrinsic corresponds to the \c VPSIGND instruction. 6900b57cec5SDimitry Andric /// 6910b57cec5SDimitry Andric /// \param __a 6920b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 6930b57cec5SDimitry Andric /// \param __b 6940b57cec5SDimitry Andric /// A 128-bit integer vector containing control doublewords corresponding to 6950b57cec5SDimitry Andric /// positions in the destination. 6960b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the resultant values. 6970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 6980b57cec5SDimitry Andric _mm_sign_epi32(__m128i __a, __m128i __b) 6990b57cec5SDimitry Andric { 7000b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric /// For each 8-bit integer in the first source operand, perform one of 7040b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 7050b57cec5SDimitry Andric /// 7060b57cec5SDimitry Andric /// If the byte in the second source is negative, calculate the two's 7070b57cec5SDimitry Andric /// complement of the corresponding byte in the first source, and write that 7080b57cec5SDimitry Andric /// value to the destination. If the byte in the second source is positive, 7090b57cec5SDimitry Andric /// copy the corresponding byte from the first source to the destination. If 7100b57cec5SDimitry Andric /// the byte in the second source is zero, clear the corresponding byte in 7110b57cec5SDimitry Andric /// the destination. 7120b57cec5SDimitry Andric /// 7130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7140b57cec5SDimitry Andric /// 7150b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGNB instruction. 7160b57cec5SDimitry Andric /// 7170b57cec5SDimitry Andric /// \param __a 7180b57cec5SDimitry Andric /// A 64-bit integer vector containing the values to be copied. 7190b57cec5SDimitry Andric /// \param __b 7200b57cec5SDimitry Andric /// A 64-bit integer vector containing control bytes corresponding to 7210b57cec5SDimitry Andric /// positions in the destination. 7220b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values. 7230b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 7240b57cec5SDimitry Andric _mm_sign_pi8(__m64 __a, __m64 __b) 7250b57cec5SDimitry Andric { 7260b57cec5SDimitry Andric return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric /// For each 16-bit integer in the first source operand, perform one of 7300b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 7310b57cec5SDimitry Andric /// 7320b57cec5SDimitry Andric /// If the word in the second source is negative, calculate the two's 7330b57cec5SDimitry Andric /// complement of the corresponding word in the first source, and write that 7340b57cec5SDimitry Andric /// value to the destination. If the word in the second source is positive, 7350b57cec5SDimitry Andric /// copy the corresponding word from the first source to the destination. If 7360b57cec5SDimitry Andric /// the word in the second source is zero, clear the corresponding word in 7370b57cec5SDimitry Andric /// the destination. 7380b57cec5SDimitry Andric /// 7390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7400b57cec5SDimitry Andric /// 7410b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGNW instruction. 7420b57cec5SDimitry Andric /// 7430b57cec5SDimitry Andric /// \param __a 7440b57cec5SDimitry Andric /// A 64-bit integer vector containing the values to be copied. 7450b57cec5SDimitry Andric /// \param __b 7460b57cec5SDimitry Andric /// A 64-bit integer vector containing control words corresponding to 7470b57cec5SDimitry Andric /// positions in the destination. 7480b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values. 7490b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 7500b57cec5SDimitry Andric _mm_sign_pi16(__m64 __a, __m64 __b) 7510b57cec5SDimitry Andric { 7520b57cec5SDimitry Andric return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric /// For each 32-bit integer in the first source operand, perform one of 7560b57cec5SDimitry Andric /// the following actions as specified by the second source operand. 7570b57cec5SDimitry Andric /// 7580b57cec5SDimitry Andric /// If the doubleword in the second source is negative, calculate the two's 7590b57cec5SDimitry Andric /// complement of the corresponding doubleword in the first source, and 7600b57cec5SDimitry Andric /// write that value to the destination. If the doubleword in the second 7610b57cec5SDimitry Andric /// source is positive, copy the corresponding doubleword from the first 7620b57cec5SDimitry Andric /// source to the destination. If the doubleword in the second source is 7630b57cec5SDimitry Andric /// zero, clear the corresponding doubleword in the destination. 7640b57cec5SDimitry Andric /// 7650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7660b57cec5SDimitry Andric /// 7670b57cec5SDimitry Andric /// This intrinsic corresponds to the \c PSIGND instruction. 7680b57cec5SDimitry Andric /// 7690b57cec5SDimitry Andric /// \param __a 7700b57cec5SDimitry Andric /// A 64-bit integer vector containing the values to be copied. 7710b57cec5SDimitry Andric /// \param __b 7720b57cec5SDimitry Andric /// A 64-bit integer vector containing two control doublewords corresponding 7730b57cec5SDimitry Andric /// to positions in the destination. 7740b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the resultant values. 7750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 7760b57cec5SDimitry Andric _mm_sign_pi32(__m64 __a, __m64 __b) 7770b57cec5SDimitry Andric { 7780b57cec5SDimitry Andric return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 7790b57cec5SDimitry Andric } 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 7820b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric #endif /* __TMMINTRIN_H */ 785