10b57cec5SDimitry Andric /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __MMINTRIN_H 110b57cec5SDimitry Andric #define __MMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric typedef long long __v1di __attribute__((__vector_size__(8))); 200b57cec5SDimitry Andric typedef int __v2si __attribute__((__vector_size__(8))); 210b57cec5SDimitry Andric typedef short __v4hi __attribute__((__vector_size__(8))); 220b57cec5SDimitry Andric typedef char __v8qi __attribute__((__vector_size__(8))); 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 255f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 265f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ 275f757f3fSDimitry Andric __min_vector_width__(64))) 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric /// Clears the MMX state by setting the state of the x87 stack registers 300b57cec5SDimitry Andric /// to empty. 310b57cec5SDimitry Andric /// 320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 330b57cec5SDimitry Andric /// 340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EMMS </c> instruction. 350b57cec5SDimitry Andric /// 365f757f3fSDimitry Andric static __inline__ void __attribute__((__always_inline__, __nodebug__, 375f757f3fSDimitry Andric __target__("mmx,no-evex512"))) 385f757f3fSDimitry Andric _mm_empty(void) { 390b57cec5SDimitry Andric __builtin_ia32_emms(); 400b57cec5SDimitry Andric } 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, setting the lower 32 bits to the 430b57cec5SDimitry Andric /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 440b57cec5SDimitry Andric /// 450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 460b57cec5SDimitry Andric /// 470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD </c> instruction. 480b57cec5SDimitry Andric /// 490b57cec5SDimitry Andric /// \param __i 500b57cec5SDimitry Andric /// A 32-bit integer value. 510b57cec5SDimitry Andric /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 520b57cec5SDimitry Andric /// parameter. The upper 32 bits are set to 0. 530b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 540b57cec5SDimitry Andric _mm_cvtsi32_si64(int __i) 550b57cec5SDimitry Andric { 560b57cec5SDimitry Andric return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 600b57cec5SDimitry Andric /// signed integer. 610b57cec5SDimitry Andric /// 620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 630b57cec5SDimitry Andric /// 640b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD </c> instruction. 650b57cec5SDimitry Andric /// 660b57cec5SDimitry Andric /// \param __m 670b57cec5SDimitry Andric /// A 64-bit integer vector. 680b57cec5SDimitry Andric /// \returns A 32-bit signed integer value containing the lower 32 bits of the 690b57cec5SDimitry Andric /// parameter. 700b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 710b57cec5SDimitry Andric _mm_cvtsi64_si32(__m64 __m) 720b57cec5SDimitry Andric { 730b57cec5SDimitry Andric return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric /// Casts a 64-bit signed integer value into a 64-bit integer vector. 770b57cec5SDimitry Andric /// 780b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 790b57cec5SDimitry Andric /// 800b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 810b57cec5SDimitry Andric /// 820b57cec5SDimitry Andric /// \param __i 830b57cec5SDimitry Andric /// A 64-bit signed integer. 840b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the same bitwise pattern as the 850b57cec5SDimitry Andric /// parameter. 860b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 870b57cec5SDimitry Andric _mm_cvtsi64_m64(long long __i) 880b57cec5SDimitry Andric { 890b57cec5SDimitry Andric return (__m64)__i; 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric /// Casts a 64-bit integer vector into a 64-bit signed integer value. 930b57cec5SDimitry Andric /// 940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 950b57cec5SDimitry Andric /// 960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 970b57cec5SDimitry Andric /// 980b57cec5SDimitry Andric /// \param __m 990b57cec5SDimitry Andric /// A 64-bit integer vector. 1000b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the same bitwise pattern as the 1010b57cec5SDimitry Andric /// parameter. 1020b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS 1030b57cec5SDimitry Andric _mm_cvtm64_si64(__m64 __m) 1040b57cec5SDimitry Andric { 1050b57cec5SDimitry Andric return (long long)__m; 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric 108*0fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 109*0fca6ea1SDimitry Andric /// vector parameters of [4 x i16] into 8-bit signed integer values, and 110*0fca6ea1SDimitry Andric /// constructs a 64-bit integer vector of [8 x i8] as the result. 111*0fca6ea1SDimitry Andric /// 112*0fca6ea1SDimitry Andric /// Positive values greater than 0x7F are saturated to 0x7F. Negative values 113*0fca6ea1SDimitry Andric /// less than 0x80 are saturated to 0x80. 1140b57cec5SDimitry Andric /// 1150b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1160b57cec5SDimitry Andric /// 1170b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 1180b57cec5SDimitry Andric /// 1190b57cec5SDimitry Andric /// \param __m1 120*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 121*0fca6ea1SDimitry Andric /// written to the lower 32 bits of the result. 1220b57cec5SDimitry Andric /// \param __m2 123*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 124*0fca6ea1SDimitry Andric /// written to the upper 32 bits of the result. 1250b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the converted 1260b57cec5SDimitry Andric /// values. 1270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 1280b57cec5SDimitry Andric _mm_packs_pi16(__m64 __m1, __m64 __m2) 1290b57cec5SDimitry Andric { 1300b57cec5SDimitry Andric return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 1310b57cec5SDimitry Andric } 1320b57cec5SDimitry Andric 133*0fca6ea1SDimitry Andric /// Converts, with saturation, 32-bit signed integers from both 64-bit integer 134*0fca6ea1SDimitry Andric /// vector parameters of [2 x i32] into 16-bit signed integer values, and 135*0fca6ea1SDimitry Andric /// constructs a 64-bit integer vector of [4 x i16] as the result. 136*0fca6ea1SDimitry Andric /// 137*0fca6ea1SDimitry Andric /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative 138*0fca6ea1SDimitry Andric /// values less than 0x8000 are saturated to 0x8000. 1390b57cec5SDimitry Andric /// 1400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1410b57cec5SDimitry Andric /// 1420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 1430b57cec5SDimitry Andric /// 1440b57cec5SDimitry Andric /// \param __m1 145*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 146*0fca6ea1SDimitry Andric /// written to the lower 32 bits of the result. 1470b57cec5SDimitry Andric /// \param __m2 148*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 149*0fca6ea1SDimitry Andric /// written to the upper 32 bits of the result. 1500b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the converted 1510b57cec5SDimitry Andric /// values. 1520b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 1530b57cec5SDimitry Andric _mm_packs_pi32(__m64 __m1, __m64 __m2) 1540b57cec5SDimitry Andric { 1550b57cec5SDimitry Andric return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric 158*0fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 159*0fca6ea1SDimitry Andric /// vector parameters of [4 x i16] into 8-bit unsigned integer values, and 160*0fca6ea1SDimitry Andric /// constructs a 64-bit integer vector of [8 x i8] as the result. 161*0fca6ea1SDimitry Andric /// 162*0fca6ea1SDimitry Andric /// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are 163*0fca6ea1SDimitry Andric /// saturated to 0. 1640b57cec5SDimitry Andric /// 1650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1660b57cec5SDimitry Andric /// 1670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 1680b57cec5SDimitry Andric /// 1690b57cec5SDimitry Andric /// \param __m1 170*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 171*0fca6ea1SDimitry Andric /// written to the lower 32 bits of the result. 1720b57cec5SDimitry Andric /// \param __m2 173*0fca6ea1SDimitry Andric /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 174*0fca6ea1SDimitry Andric /// written to the upper 32 bits of the result. 1750b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the converted 1760b57cec5SDimitry Andric /// values. 1770b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 1780b57cec5SDimitry Andric _mm_packs_pu16(__m64 __m1, __m64 __m2) 1790b57cec5SDimitry Andric { 1800b57cec5SDimitry Andric return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 1840b57cec5SDimitry Andric /// and interleaves them into a 64-bit integer vector of [8 x i8]. 1850b57cec5SDimitry Andric /// 1860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1870b57cec5SDimitry Andric /// 1880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 1890b57cec5SDimitry Andric /// 1900b57cec5SDimitry Andric /// \param __m1 1910b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. \n 1920b57cec5SDimitry Andric /// Bits [39:32] are written to bits [7:0] of the result. \n 1930b57cec5SDimitry Andric /// Bits [47:40] are written to bits [23:16] of the result. \n 1940b57cec5SDimitry Andric /// Bits [55:48] are written to bits [39:32] of the result. \n 1950b57cec5SDimitry Andric /// Bits [63:56] are written to bits [55:48] of the result. 1960b57cec5SDimitry Andric /// \param __m2 1970b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 1980b57cec5SDimitry Andric /// Bits [39:32] are written to bits [15:8] of the result. \n 1990b57cec5SDimitry Andric /// Bits [47:40] are written to bits [31:24] of the result. \n 2000b57cec5SDimitry Andric /// Bits [55:48] are written to bits [47:40] of the result. \n 2010b57cec5SDimitry Andric /// Bits [63:56] are written to bits [63:56] of the result. 2020b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 2030b57cec5SDimitry Andric /// values. 2040b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 2050b57cec5SDimitry Andric _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 2060b57cec5SDimitry Andric { 2070b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of 2110b57cec5SDimitry Andric /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 2120b57cec5SDimitry Andric /// 2130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2140b57cec5SDimitry Andric /// 2150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 2160b57cec5SDimitry Andric /// 2170b57cec5SDimitry Andric /// \param __m1 2180b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 2190b57cec5SDimitry Andric /// Bits [47:32] are written to bits [15:0] of the result. \n 2200b57cec5SDimitry Andric /// Bits [63:48] are written to bits [47:32] of the result. 2210b57cec5SDimitry Andric /// \param __m2 2220b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 2230b57cec5SDimitry Andric /// Bits [47:32] are written to bits [31:16] of the result. \n 2240b57cec5SDimitry Andric /// Bits [63:48] are written to bits [63:48] of the result. 2250b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 2260b57cec5SDimitry Andric /// values. 2270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 2280b57cec5SDimitry Andric _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 2290b57cec5SDimitry Andric { 2300b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric /// Unpacks the upper 32 bits from two 64-bit integer vectors of 2340b57cec5SDimitry Andric /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 2350b57cec5SDimitry Andric /// 2360b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2370b57cec5SDimitry Andric /// 2380b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 2390b57cec5SDimitry Andric /// 2400b57cec5SDimitry Andric /// \param __m1 2410b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 2420b57cec5SDimitry Andric /// the lower 32 bits of the result. 2430b57cec5SDimitry Andric /// \param __m2 2440b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 2450b57cec5SDimitry Andric /// the upper 32 bits of the result. 2460b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 2470b57cec5SDimitry Andric /// values. 2480b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 2490b57cec5SDimitry Andric _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 2500b57cec5SDimitry Andric { 2510b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 2550b57cec5SDimitry Andric /// and interleaves them into a 64-bit integer vector of [8 x i8]. 2560b57cec5SDimitry Andric /// 2570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2580b57cec5SDimitry Andric /// 2590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 2600b57cec5SDimitry Andric /// 2610b57cec5SDimitry Andric /// \param __m1 2620b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 2630b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 2640b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 2650b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 2660b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. 2670b57cec5SDimitry Andric /// \param __m2 2680b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 2690b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 2700b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 2710b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 2720b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. 2730b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 2740b57cec5SDimitry Andric /// values. 2750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 2760b57cec5SDimitry Andric _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 2770b57cec5SDimitry Andric { 2780b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of 2820b57cec5SDimitry Andric /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 2830b57cec5SDimitry Andric /// 2840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2850b57cec5SDimitry Andric /// 2860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 2870b57cec5SDimitry Andric /// 2880b57cec5SDimitry Andric /// \param __m1 2890b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 2900b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 2910b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. 2920b57cec5SDimitry Andric /// \param __m2 2930b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 2940b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 2950b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. 2960b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 2970b57cec5SDimitry Andric /// values. 2980b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 2990b57cec5SDimitry Andric _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 3000b57cec5SDimitry Andric { 3010b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric /// Unpacks the lower 32 bits from two 64-bit integer vectors of 3050b57cec5SDimitry Andric /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 3060b57cec5SDimitry Andric /// 3070b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3080b57cec5SDimitry Andric /// 3090b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 3100b57cec5SDimitry Andric /// 3110b57cec5SDimitry Andric /// \param __m1 3120b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 3130b57cec5SDimitry Andric /// the lower 32 bits of the result. 3140b57cec5SDimitry Andric /// \param __m2 3150b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 3160b57cec5SDimitry Andric /// the upper 32 bits of the result. 3170b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 3180b57cec5SDimitry Andric /// values. 3190b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 3200b57cec5SDimitry Andric _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 3210b57cec5SDimitry Andric { 3220b57cec5SDimitry Andric return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric /// Adds each 8-bit integer element of the first 64-bit integer vector 3260b57cec5SDimitry Andric /// of [8 x i8] to the corresponding 8-bit integer element of the second 3270b57cec5SDimitry Andric /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 3280b57cec5SDimitry Andric /// packed into a 64-bit integer vector of [8 x i8]. 3290b57cec5SDimitry Andric /// 3300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3310b57cec5SDimitry Andric /// 3320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDB </c> instruction. 3330b57cec5SDimitry Andric /// 3340b57cec5SDimitry Andric /// \param __m1 3350b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 3360b57cec5SDimitry Andric /// \param __m2 3370b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 3380b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 3390b57cec5SDimitry Andric /// parameters. 3400b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 3410b57cec5SDimitry Andric _mm_add_pi8(__m64 __m1, __m64 __m2) 3420b57cec5SDimitry Andric { 3430b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric /// Adds each 16-bit integer element of the first 64-bit integer vector 3470b57cec5SDimitry Andric /// of [4 x i16] to the corresponding 16-bit integer element of the second 3480b57cec5SDimitry Andric /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 3490b57cec5SDimitry Andric /// packed into a 64-bit integer vector of [4 x i16]. 3500b57cec5SDimitry Andric /// 3510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3520b57cec5SDimitry Andric /// 3530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDW </c> instruction. 3540b57cec5SDimitry Andric /// 3550b57cec5SDimitry Andric /// \param __m1 3560b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 3570b57cec5SDimitry Andric /// \param __m2 3580b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 3590b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 3600b57cec5SDimitry Andric /// parameters. 3610b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 3620b57cec5SDimitry Andric _mm_add_pi16(__m64 __m1, __m64 __m2) 3630b57cec5SDimitry Andric { 3640b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric /// Adds each 32-bit integer element of the first 64-bit integer vector 3680b57cec5SDimitry Andric /// of [2 x i32] to the corresponding 32-bit integer element of the second 3690b57cec5SDimitry Andric /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 3700b57cec5SDimitry Andric /// packed into a 64-bit integer vector of [2 x i32]. 3710b57cec5SDimitry Andric /// 3720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3730b57cec5SDimitry Andric /// 3740b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDD </c> instruction. 3750b57cec5SDimitry Andric /// 3760b57cec5SDimitry Andric /// \param __m1 3770b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 3780b57cec5SDimitry Andric /// \param __m2 3790b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 3800b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 3810b57cec5SDimitry Andric /// parameters. 3820b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 3830b57cec5SDimitry Andric _mm_add_pi32(__m64 __m1, __m64 __m2) 3840b57cec5SDimitry Andric { 3850b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 3860b57cec5SDimitry Andric } 3870b57cec5SDimitry Andric 388*0fca6ea1SDimitry Andric /// Adds, with saturation, each 8-bit signed integer element of the first 389*0fca6ea1SDimitry Andric /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed 390*0fca6ea1SDimitry Andric /// integer element of the second 64-bit integer vector of [8 x i8]. 391*0fca6ea1SDimitry Andric /// 392*0fca6ea1SDimitry Andric /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums 393*0fca6ea1SDimitry Andric /// less than 0x80 are saturated to 0x80. The results are packed into a 394*0fca6ea1SDimitry Andric /// 64-bit integer vector of [8 x i8]. 3950b57cec5SDimitry Andric /// 3960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3970b57cec5SDimitry Andric /// 3980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDSB </c> instruction. 3990b57cec5SDimitry Andric /// 4000b57cec5SDimitry Andric /// \param __m1 4010b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 4020b57cec5SDimitry Andric /// \param __m2 4030b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 4040b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 4050b57cec5SDimitry Andric /// of both parameters. 4060b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4070b57cec5SDimitry Andric _mm_adds_pi8(__m64 __m1, __m64 __m2) 4080b57cec5SDimitry Andric { 4090b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 412*0fca6ea1SDimitry Andric /// Adds, with saturation, each 16-bit signed integer element of the first 413*0fca6ea1SDimitry Andric /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed 414*0fca6ea1SDimitry Andric /// integer element of the second 64-bit integer vector of [4 x i16]. 415*0fca6ea1SDimitry Andric /// 416*0fca6ea1SDimitry Andric /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 417*0fca6ea1SDimitry Andric /// less than 0x8000 are saturated to 0x8000. The results are packed into a 418*0fca6ea1SDimitry Andric /// 64-bit integer vector of [4 x i16]. 4190b57cec5SDimitry Andric /// 4200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4210b57cec5SDimitry Andric /// 4220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDSW </c> instruction. 4230b57cec5SDimitry Andric /// 4240b57cec5SDimitry Andric /// \param __m1 4250b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 4260b57cec5SDimitry Andric /// \param __m2 4270b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 4280b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 4290b57cec5SDimitry Andric /// of both parameters. 4300b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4310b57cec5SDimitry Andric _mm_adds_pi16(__m64 __m1, __m64 __m2) 4320b57cec5SDimitry Andric { 4330b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 436*0fca6ea1SDimitry Andric /// Adds, with saturation, each 8-bit unsigned integer element of the first 437*0fca6ea1SDimitry Andric /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned 438*0fca6ea1SDimitry Andric /// integer element of the second 64-bit integer vector of [8 x i8]. 439*0fca6ea1SDimitry Andric /// 440*0fca6ea1SDimitry Andric /// Sums greater than 0xFF are saturated to 0xFF. The results are packed 441*0fca6ea1SDimitry Andric /// into a 64-bit integer vector of [8 x i8]. 4420b57cec5SDimitry Andric /// 4430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4440b57cec5SDimitry Andric /// 4450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 4460b57cec5SDimitry Andric /// 4470b57cec5SDimitry Andric /// \param __m1 4480b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 4490b57cec5SDimitry Andric /// \param __m2 4500b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 4510b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 4520b57cec5SDimitry Andric /// unsigned sums of both parameters. 4530b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4540b57cec5SDimitry Andric _mm_adds_pu8(__m64 __m1, __m64 __m2) 4550b57cec5SDimitry Andric { 4560b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 4570b57cec5SDimitry Andric } 4580b57cec5SDimitry Andric 459*0fca6ea1SDimitry Andric /// Adds, with saturation, each 16-bit unsigned integer element of the first 460*0fca6ea1SDimitry Andric /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned 461*0fca6ea1SDimitry Andric /// integer element of the second 64-bit integer vector of [4 x i16]. 462*0fca6ea1SDimitry Andric /// 463*0fca6ea1SDimitry Andric /// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed 464*0fca6ea1SDimitry Andric /// into a 64-bit integer vector of [4 x i16]. 4650b57cec5SDimitry Andric /// 4660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4670b57cec5SDimitry Andric /// 4680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 4690b57cec5SDimitry Andric /// 4700b57cec5SDimitry Andric /// \param __m1 4710b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 4720b57cec5SDimitry Andric /// \param __m2 4730b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 4740b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 4750b57cec5SDimitry Andric /// unsigned sums of both parameters. 4760b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4770b57cec5SDimitry Andric _mm_adds_pu16(__m64 __m1, __m64 __m2) 4780b57cec5SDimitry Andric { 4790b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric /// Subtracts each 8-bit integer element of the second 64-bit integer 4830b57cec5SDimitry Andric /// vector of [8 x i8] from the corresponding 8-bit integer element of the 4840b57cec5SDimitry Andric /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 4850b57cec5SDimitry Andric /// are packed into a 64-bit integer vector of [8 x i8]. 4860b57cec5SDimitry Andric /// 4870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4880b57cec5SDimitry Andric /// 4890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBB </c> instruction. 4900b57cec5SDimitry Andric /// 4910b57cec5SDimitry Andric /// \param __m1 4920b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the minuends. 4930b57cec5SDimitry Andric /// \param __m2 4940b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 4950b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 4960b57cec5SDimitry Andric /// both parameters. 4970b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 4980b57cec5SDimitry Andric _mm_sub_pi8(__m64 __m1, __m64 __m2) 4990b57cec5SDimitry Andric { 5000b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric /// Subtracts each 16-bit integer element of the second 64-bit integer 5040b57cec5SDimitry Andric /// vector of [4 x i16] from the corresponding 16-bit integer element of the 5050b57cec5SDimitry Andric /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 5060b57cec5SDimitry Andric /// results are packed into a 64-bit integer vector of [4 x i16]. 5070b57cec5SDimitry Andric /// 5080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5090b57cec5SDimitry Andric /// 5100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBW </c> instruction. 5110b57cec5SDimitry Andric /// 5120b57cec5SDimitry Andric /// \param __m1 5130b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the minuends. 5140b57cec5SDimitry Andric /// \param __m2 5150b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 5160b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 5170b57cec5SDimitry Andric /// both parameters. 5180b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 5190b57cec5SDimitry Andric _mm_sub_pi16(__m64 __m1, __m64 __m2) 5200b57cec5SDimitry Andric { 5210b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 5220b57cec5SDimitry Andric } 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric /// Subtracts each 32-bit integer element of the second 64-bit integer 5250b57cec5SDimitry Andric /// vector of [2 x i32] from the corresponding 32-bit integer element of the 5260b57cec5SDimitry Andric /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 5270b57cec5SDimitry Andric /// results are packed into a 64-bit integer vector of [2 x i32]. 5280b57cec5SDimitry Andric /// 5290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5300b57cec5SDimitry Andric /// 5310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBD </c> instruction. 5320b57cec5SDimitry Andric /// 5330b57cec5SDimitry Andric /// \param __m1 5340b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32] containing the minuends. 5350b57cec5SDimitry Andric /// \param __m2 5360b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 5370b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 5380b57cec5SDimitry Andric /// both parameters. 5390b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 5400b57cec5SDimitry Andric _mm_sub_pi32(__m64 __m1, __m64 __m2) 5410b57cec5SDimitry Andric { 5420b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 545*0fca6ea1SDimitry Andric /// Subtracts, with saturation, each 8-bit signed integer element of the second 546*0fca6ea1SDimitry Andric /// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed 547*0fca6ea1SDimitry Andric /// integer element of the first 64-bit integer vector of [8 x i8]. 548*0fca6ea1SDimitry Andric /// 549*0fca6ea1SDimitry Andric /// Positive results greater than 0x7F are saturated to 0x7F. Negative 550*0fca6ea1SDimitry Andric /// results less than 0x80 are saturated to 0x80. The results are packed 551*0fca6ea1SDimitry Andric /// into a 64-bit integer vector of [8 x i8]. 5520b57cec5SDimitry Andric /// 5530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5540b57cec5SDimitry Andric /// 5550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 5560b57cec5SDimitry Andric /// 5570b57cec5SDimitry Andric /// \param __m1 5580b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the minuends. 5590b57cec5SDimitry Andric /// \param __m2 5600b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 5610b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 5620b57cec5SDimitry Andric /// differences of both parameters. 5630b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 5640b57cec5SDimitry Andric _mm_subs_pi8(__m64 __m1, __m64 __m2) 5650b57cec5SDimitry Andric { 5660b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 569*0fca6ea1SDimitry Andric /// Subtracts, with saturation, each 16-bit signed integer element of the 570*0fca6ea1SDimitry Andric /// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit 571*0fca6ea1SDimitry Andric /// signed integer element of the first 64-bit integer vector of [4 x i16]. 572*0fca6ea1SDimitry Andric /// 573*0fca6ea1SDimitry Andric /// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative 574*0fca6ea1SDimitry Andric /// results less than 0x8000 are saturated to 0x8000. The results are packed 575*0fca6ea1SDimitry Andric /// into a 64-bit integer vector of [4 x i16]. 5760b57cec5SDimitry Andric /// 5770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5780b57cec5SDimitry Andric /// 5790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 5800b57cec5SDimitry Andric /// 5810b57cec5SDimitry Andric /// \param __m1 5820b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the minuends. 5830b57cec5SDimitry Andric /// \param __m2 5840b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 5850b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 5860b57cec5SDimitry Andric /// differences of both parameters. 5870b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 5880b57cec5SDimitry Andric _mm_subs_pi16(__m64 __m1, __m64 __m2) 5890b57cec5SDimitry Andric { 5900b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric /// Subtracts each 8-bit unsigned integer element of the second 64-bit 5940b57cec5SDimitry Andric /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 5950b57cec5SDimitry Andric /// element of the first 64-bit integer vector of [8 x i8]. 5960b57cec5SDimitry Andric /// 5970b57cec5SDimitry Andric /// If an element of the first vector is less than the corresponding element 5980b57cec5SDimitry Andric /// of the second vector, the result is saturated to 0. The results are 5990b57cec5SDimitry Andric /// packed into a 64-bit integer vector of [8 x i8]. 6000b57cec5SDimitry Andric /// 6010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6020b57cec5SDimitry Andric /// 6030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 6040b57cec5SDimitry Andric /// 6050b57cec5SDimitry Andric /// \param __m1 6060b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the minuends. 6070b57cec5SDimitry Andric /// \param __m2 6080b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 6090b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 6100b57cec5SDimitry Andric /// differences of both parameters. 6110b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 6120b57cec5SDimitry Andric _mm_subs_pu8(__m64 __m1, __m64 __m2) 6130b57cec5SDimitry Andric { 6140b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric /// Subtracts each 16-bit unsigned integer element of the second 64-bit 6180b57cec5SDimitry Andric /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 6190b57cec5SDimitry Andric /// integer element of the first 64-bit integer vector of [4 x i16]. 6200b57cec5SDimitry Andric /// 6210b57cec5SDimitry Andric /// If an element of the first vector is less than the corresponding element 6220b57cec5SDimitry Andric /// of the second vector, the result is saturated to 0. The results are 6230b57cec5SDimitry Andric /// packed into a 64-bit integer vector of [4 x i16]. 6240b57cec5SDimitry Andric /// 6250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6260b57cec5SDimitry Andric /// 6270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 6280b57cec5SDimitry Andric /// 6290b57cec5SDimitry Andric /// \param __m1 6300b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the minuends. 6310b57cec5SDimitry Andric /// \param __m2 6320b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 6330b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 6340b57cec5SDimitry Andric /// differences of both parameters. 6350b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 6360b57cec5SDimitry Andric _mm_subs_pu16(__m64 __m1, __m64 __m2) 6370b57cec5SDimitry Andric { 6380b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 6390b57cec5SDimitry Andric } 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit 6420b57cec5SDimitry Andric /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 6430b57cec5SDimitry Andric /// element of the second 64-bit integer vector of [4 x i16] and get four 6440b57cec5SDimitry Andric /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 6450b57cec5SDimitry Andric /// The lower 32 bits of these two sums are packed into a 64-bit integer 6460b57cec5SDimitry Andric /// vector of [2 x i32]. 6470b57cec5SDimitry Andric /// 6480b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 6490b57cec5SDimitry Andric /// of both parameters are multiplied, and the sum of both results is written 6500b57cec5SDimitry Andric /// to bits [31:0] of the result. 6510b57cec5SDimitry Andric /// 6520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6530b57cec5SDimitry Andric /// 6540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 6550b57cec5SDimitry Andric /// 6560b57cec5SDimitry Andric /// \param __m1 6570b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 6580b57cec5SDimitry Andric /// \param __m2 6590b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 6600b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 6610b57cec5SDimitry Andric /// products of both parameters. 6620b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 6630b57cec5SDimitry Andric _mm_madd_pi16(__m64 __m1, __m64 __m2) 6640b57cec5SDimitry Andric { 6650b57cec5SDimitry Andric return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit 6690b57cec5SDimitry Andric /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 6700b57cec5SDimitry Andric /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 6710b57cec5SDimitry Andric /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 6720b57cec5SDimitry Andric /// 6730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6740b57cec5SDimitry Andric /// 6750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULHW </c> instruction. 6760b57cec5SDimitry Andric /// 6770b57cec5SDimitry Andric /// \param __m1 6780b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 6790b57cec5SDimitry Andric /// \param __m2 6800b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 6810b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 6820b57cec5SDimitry Andric /// of the products of both parameters. 6830b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 6840b57cec5SDimitry Andric _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 6850b57cec5SDimitry Andric { 6860b57cec5SDimitry Andric return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric /// Multiplies each 16-bit signed integer element of the first 64-bit 6900b57cec5SDimitry Andric /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 6910b57cec5SDimitry Andric /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 6920b57cec5SDimitry Andric /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 6930b57cec5SDimitry Andric /// 6940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6950b57cec5SDimitry Andric /// 6960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULLW </c> instruction. 6970b57cec5SDimitry Andric /// 6980b57cec5SDimitry Andric /// \param __m1 6990b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 7000b57cec5SDimitry Andric /// \param __m2 7010b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 7020b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 7030b57cec5SDimitry Andric /// of the products of both parameters. 7040b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 7050b57cec5SDimitry Andric _mm_mullo_pi16(__m64 __m1, __m64 __m2) 7060b57cec5SDimitry Andric { 7070b57cec5SDimitry Andric return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 7080b57cec5SDimitry Andric } 7090b57cec5SDimitry Andric 7100b57cec5SDimitry Andric /// Left-shifts each 16-bit signed integer element of the first 7110b57cec5SDimitry Andric /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 7120b57cec5SDimitry Andric /// of bits specified by the second parameter, which is a 64-bit integer. The 7130b57cec5SDimitry Andric /// lower 16 bits of the results are packed into a 64-bit integer vector of 7140b57cec5SDimitry Andric /// [4 x i16]. 7150b57cec5SDimitry Andric /// 7160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7170b57cec5SDimitry Andric /// 7180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 7190b57cec5SDimitry Andric /// 7200b57cec5SDimitry Andric /// \param __m 7210b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 7220b57cec5SDimitry Andric /// \param __count 7230b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 7240b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 7250b57cec5SDimitry Andric /// values. If \a __count is greater or equal to 16, the result is set to all 7260b57cec5SDimitry Andric /// 0. 7270b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 7280b57cec5SDimitry Andric _mm_sll_pi16(__m64 __m, __m64 __count) 7290b57cec5SDimitry Andric { 7300b57cec5SDimitry Andric return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric /// Left-shifts each 16-bit signed integer element of a 64-bit integer 7340b57cec5SDimitry Andric /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 7350b57cec5SDimitry Andric /// The lower 16 bits of the results are packed into a 64-bit integer vector 7360b57cec5SDimitry Andric /// of [4 x i16]. 7370b57cec5SDimitry Andric /// 7380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7390b57cec5SDimitry Andric /// 7400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 7410b57cec5SDimitry Andric /// 7420b57cec5SDimitry Andric /// \param __m 7430b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 7440b57cec5SDimitry Andric /// \param __count 7450b57cec5SDimitry Andric /// A 32-bit integer value. 7460b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 7470b57cec5SDimitry Andric /// values. If \a __count is greater or equal to 16, the result is set to all 7480b57cec5SDimitry Andric /// 0. 7490b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 7500b57cec5SDimitry Andric _mm_slli_pi16(__m64 __m, int __count) 7510b57cec5SDimitry Andric { 7520b57cec5SDimitry Andric return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric /// Left-shifts each 32-bit signed integer element of the first 7560b57cec5SDimitry Andric /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 7570b57cec5SDimitry Andric /// of bits specified by the second parameter, which is a 64-bit integer. The 7580b57cec5SDimitry Andric /// lower 32 bits of the results are packed into a 64-bit integer vector of 7590b57cec5SDimitry Andric /// [2 x i32]. 7600b57cec5SDimitry Andric /// 7610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7620b57cec5SDimitry Andric /// 7630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 7640b57cec5SDimitry Andric /// 7650b57cec5SDimitry Andric /// \param __m 7660b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 7670b57cec5SDimitry Andric /// \param __count 7680b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 7690b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 7700b57cec5SDimitry Andric /// values. If \a __count is greater or equal to 32, the result is set to all 7710b57cec5SDimitry Andric /// 0. 7720b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 7730b57cec5SDimitry Andric _mm_sll_pi32(__m64 __m, __m64 __count) 7740b57cec5SDimitry Andric { 7750b57cec5SDimitry Andric return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric /// Left-shifts each 32-bit signed integer element of a 64-bit integer 7790b57cec5SDimitry Andric /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 7800b57cec5SDimitry Andric /// The lower 32 bits of the results are packed into a 64-bit integer vector 7810b57cec5SDimitry Andric /// of [2 x i32]. 7820b57cec5SDimitry Andric /// 7830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7840b57cec5SDimitry Andric /// 7850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 7860b57cec5SDimitry Andric /// 7870b57cec5SDimitry Andric /// \param __m 7880b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 7890b57cec5SDimitry Andric /// \param __count 7900b57cec5SDimitry Andric /// A 32-bit integer value. 7910b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 7920b57cec5SDimitry Andric /// values. If \a __count is greater or equal to 32, the result is set to all 7930b57cec5SDimitry Andric /// 0. 7940b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 7950b57cec5SDimitry Andric _mm_slli_pi32(__m64 __m, int __count) 7960b57cec5SDimitry Andric { 7970b57cec5SDimitry Andric return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric /// Left-shifts the first 64-bit integer parameter by the number of bits 8010b57cec5SDimitry Andric /// specified by the second 64-bit integer parameter. The lower 64 bits of 8020b57cec5SDimitry Andric /// result are returned. 8030b57cec5SDimitry Andric /// 8040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8050b57cec5SDimitry Andric /// 8060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 8070b57cec5SDimitry Andric /// 8080b57cec5SDimitry Andric /// \param __m 8090b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 8100b57cec5SDimitry Andric /// \param __count 8110b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 8120b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the left-shifted value. If 8130b57cec5SDimitry Andric /// \a __count is greater or equal to 64, the result is set to 0. 8140b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 8150b57cec5SDimitry Andric _mm_sll_si64(__m64 __m, __m64 __count) 8160b57cec5SDimitry Andric { 8170b57cec5SDimitry Andric return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 8180b57cec5SDimitry Andric } 8190b57cec5SDimitry Andric 8200b57cec5SDimitry Andric /// Left-shifts the first parameter, which is a 64-bit integer, by the 8210b57cec5SDimitry Andric /// number of bits specified by the second parameter, which is a 32-bit 8220b57cec5SDimitry Andric /// integer. The lower 64 bits of result are returned. 8230b57cec5SDimitry Andric /// 8240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8250b57cec5SDimitry Andric /// 8260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 8270b57cec5SDimitry Andric /// 8280b57cec5SDimitry Andric /// \param __m 8290b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 8300b57cec5SDimitry Andric /// \param __count 8310b57cec5SDimitry Andric /// A 32-bit integer value. 8320b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the left-shifted value. If 8330b57cec5SDimitry Andric /// \a __count is greater or equal to 64, the result is set to 0. 8340b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 8350b57cec5SDimitry Andric _mm_slli_si64(__m64 __m, int __count) 8360b57cec5SDimitry Andric { 8370b57cec5SDimitry Andric return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 8380b57cec5SDimitry Andric } 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of the first parameter, 8410b57cec5SDimitry Andric /// which is a 64-bit integer vector of [4 x i16], by the number of bits 8420b57cec5SDimitry Andric /// specified by the second parameter, which is a 64-bit integer. 8430b57cec5SDimitry Andric /// 8440b57cec5SDimitry Andric /// High-order bits are filled with the sign bit of the initial value of each 8450b57cec5SDimitry Andric /// 16-bit element. The 16-bit results are packed into a 64-bit integer 8460b57cec5SDimitry Andric /// vector of [4 x i16]. 8470b57cec5SDimitry Andric /// 8480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8490b57cec5SDimitry Andric /// 8500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 8510b57cec5SDimitry Andric /// 8520b57cec5SDimitry Andric /// \param __m 8530b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 8540b57cec5SDimitry Andric /// \param __count 8550b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 8560b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 8570b57cec5SDimitry Andric /// values. 8580b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 8590b57cec5SDimitry Andric _mm_sra_pi16(__m64 __m, __m64 __count) 8600b57cec5SDimitry Andric { 8610b57cec5SDimitry Andric return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of a 64-bit integer vector 8650b57cec5SDimitry Andric /// of [4 x i16] by the number of bits specified by a 32-bit integer. 8660b57cec5SDimitry Andric /// 8670b57cec5SDimitry Andric /// High-order bits are filled with the sign bit of the initial value of each 8680b57cec5SDimitry Andric /// 16-bit element. The 16-bit results are packed into a 64-bit integer 8690b57cec5SDimitry Andric /// vector of [4 x i16]. 8700b57cec5SDimitry Andric /// 8710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8720b57cec5SDimitry Andric /// 8730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 8740b57cec5SDimitry Andric /// 8750b57cec5SDimitry Andric /// \param __m 8760b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 8770b57cec5SDimitry Andric /// \param __count 8780b57cec5SDimitry Andric /// A 32-bit integer value. 8790b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 8800b57cec5SDimitry Andric /// values. 8810b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 8820b57cec5SDimitry Andric _mm_srai_pi16(__m64 __m, int __count) 8830b57cec5SDimitry Andric { 8840b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of the first parameter, 8880b57cec5SDimitry Andric /// which is a 64-bit integer vector of [2 x i32], by the number of bits 8890b57cec5SDimitry Andric /// specified by the second parameter, which is a 64-bit integer. 8900b57cec5SDimitry Andric /// 8910b57cec5SDimitry Andric /// High-order bits are filled with the sign bit of the initial value of each 8920b57cec5SDimitry Andric /// 32-bit element. The 32-bit results are packed into a 64-bit integer 8930b57cec5SDimitry Andric /// vector of [2 x i32]. 8940b57cec5SDimitry Andric /// 8950b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8960b57cec5SDimitry Andric /// 8970b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 8980b57cec5SDimitry Andric /// 8990b57cec5SDimitry Andric /// \param __m 9000b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 9010b57cec5SDimitry Andric /// \param __count 9020b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 9030b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 9040b57cec5SDimitry Andric /// values. 9050b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 9060b57cec5SDimitry Andric _mm_sra_pi32(__m64 __m, __m64 __count) 9070b57cec5SDimitry Andric { 9080b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 9090b57cec5SDimitry Andric } 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of a 64-bit integer vector 9120b57cec5SDimitry Andric /// of [2 x i32] by the number of bits specified by a 32-bit integer. 9130b57cec5SDimitry Andric /// 9140b57cec5SDimitry Andric /// High-order bits are filled with the sign bit of the initial value of each 9150b57cec5SDimitry Andric /// 32-bit element. The 32-bit results are packed into a 64-bit integer 9160b57cec5SDimitry Andric /// vector of [2 x i32]. 9170b57cec5SDimitry Andric /// 9180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9190b57cec5SDimitry Andric /// 9200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 9210b57cec5SDimitry Andric /// 9220b57cec5SDimitry Andric /// \param __m 9230b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 9240b57cec5SDimitry Andric /// \param __count 9250b57cec5SDimitry Andric /// A 32-bit integer value. 9260b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 9270b57cec5SDimitry Andric /// values. 9280b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 9290b57cec5SDimitry Andric _mm_srai_pi32(__m64 __m, int __count) 9300b57cec5SDimitry Andric { 9310b57cec5SDimitry Andric return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of the first parameter, 9350b57cec5SDimitry Andric /// which is a 64-bit integer vector of [4 x i16], by the number of bits 9360b57cec5SDimitry Andric /// specified by the second parameter, which is a 64-bit integer. 9370b57cec5SDimitry Andric /// 9380b57cec5SDimitry Andric /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 9390b57cec5SDimitry Andric /// integer vector of [4 x i16]. 9400b57cec5SDimitry Andric /// 9410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9420b57cec5SDimitry Andric /// 9430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 9440b57cec5SDimitry Andric /// 9450b57cec5SDimitry Andric /// \param __m 9460b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 9470b57cec5SDimitry Andric /// \param __count 9480b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 9490b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 9500b57cec5SDimitry Andric /// values. 9510b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 9520b57cec5SDimitry Andric _mm_srl_pi16(__m64 __m, __m64 __count) 9530b57cec5SDimitry Andric { 9540b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric 9570b57cec5SDimitry Andric /// Right-shifts each 16-bit integer element of a 64-bit integer vector 9580b57cec5SDimitry Andric /// of [4 x i16] by the number of bits specified by a 32-bit integer. 9590b57cec5SDimitry Andric /// 9600b57cec5SDimitry Andric /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 9610b57cec5SDimitry Andric /// integer vector of [4 x i16]. 9620b57cec5SDimitry Andric /// 9630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9640b57cec5SDimitry Andric /// 9650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 9660b57cec5SDimitry Andric /// 9670b57cec5SDimitry Andric /// \param __m 9680b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 9690b57cec5SDimitry Andric /// \param __count 9700b57cec5SDimitry Andric /// A 32-bit integer value. 9710b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 9720b57cec5SDimitry Andric /// values. 9730b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 9740b57cec5SDimitry Andric _mm_srli_pi16(__m64 __m, int __count) 9750b57cec5SDimitry Andric { 9760b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 9770b57cec5SDimitry Andric } 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of the first parameter, 9800b57cec5SDimitry Andric /// which is a 64-bit integer vector of [2 x i32], by the number of bits 9810b57cec5SDimitry Andric /// specified by the second parameter, which is a 64-bit integer. 9820b57cec5SDimitry Andric /// 9830b57cec5SDimitry Andric /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 9840b57cec5SDimitry Andric /// integer vector of [2 x i32]. 9850b57cec5SDimitry Andric /// 9860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9870b57cec5SDimitry Andric /// 9880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 9890b57cec5SDimitry Andric /// 9900b57cec5SDimitry Andric /// \param __m 9910b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 9920b57cec5SDimitry Andric /// \param __count 9930b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 9940b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 9950b57cec5SDimitry Andric /// values. 9960b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 9970b57cec5SDimitry Andric _mm_srl_pi32(__m64 __m, __m64 __count) 9980b57cec5SDimitry Andric { 9990b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric /// Right-shifts each 32-bit integer element of a 64-bit integer vector 10030b57cec5SDimitry Andric /// of [2 x i32] by the number of bits specified by a 32-bit integer. 10040b57cec5SDimitry Andric /// 10050b57cec5SDimitry Andric /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 10060b57cec5SDimitry Andric /// integer vector of [2 x i32]. 10070b57cec5SDimitry Andric /// 10080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10090b57cec5SDimitry Andric /// 10100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 10110b57cec5SDimitry Andric /// 10120b57cec5SDimitry Andric /// \param __m 10130b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 10140b57cec5SDimitry Andric /// \param __count 10150b57cec5SDimitry Andric /// A 32-bit integer value. 10160b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 10170b57cec5SDimitry Andric /// values. 10180b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 10190b57cec5SDimitry Andric _mm_srli_pi32(__m64 __m, int __count) 10200b57cec5SDimitry Andric { 10210b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric /// Right-shifts the first 64-bit integer parameter by the number of bits 10250b57cec5SDimitry Andric /// specified by the second 64-bit integer parameter. 10260b57cec5SDimitry Andric /// 10270b57cec5SDimitry Andric /// High-order bits are cleared. 10280b57cec5SDimitry Andric /// 10290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10300b57cec5SDimitry Andric /// 10310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 10320b57cec5SDimitry Andric /// 10330b57cec5SDimitry Andric /// \param __m 10340b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 10350b57cec5SDimitry Andric /// \param __count 10360b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 10370b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the right-shifted value. 10380b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 10390b57cec5SDimitry Andric _mm_srl_si64(__m64 __m, __m64 __count) 10400b57cec5SDimitry Andric { 10410b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 10420b57cec5SDimitry Andric } 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric /// Right-shifts the first parameter, which is a 64-bit integer, by the 10450b57cec5SDimitry Andric /// number of bits specified by the second parameter, which is a 32-bit 10460b57cec5SDimitry Andric /// integer. 10470b57cec5SDimitry Andric /// 10480b57cec5SDimitry Andric /// High-order bits are cleared. 10490b57cec5SDimitry Andric /// 10500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10510b57cec5SDimitry Andric /// 10520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 10530b57cec5SDimitry Andric /// 10540b57cec5SDimitry Andric /// \param __m 10550b57cec5SDimitry Andric /// A 64-bit integer vector interpreted as a single 64-bit integer. 10560b57cec5SDimitry Andric /// \param __count 10570b57cec5SDimitry Andric /// A 32-bit integer value. 10580b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the right-shifted value. 10590b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 10600b57cec5SDimitry Andric _mm_srli_si64(__m64 __m, int __count) 10610b57cec5SDimitry Andric { 10620b57cec5SDimitry Andric return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric /// Performs a bitwise AND of two 64-bit integer vectors. 10660b57cec5SDimitry Andric /// 10670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10680b57cec5SDimitry Andric /// 10690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAND </c> instruction. 10700b57cec5SDimitry Andric /// 10710b57cec5SDimitry Andric /// \param __m1 10720b57cec5SDimitry Andric /// A 64-bit integer vector. 10730b57cec5SDimitry Andric /// \param __m2 10740b57cec5SDimitry Andric /// A 64-bit integer vector. 10750b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise AND of both 10760b57cec5SDimitry Andric /// parameters. 10770b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 10780b57cec5SDimitry Andric _mm_and_si64(__m64 __m1, __m64 __m2) 10790b57cec5SDimitry Andric { 10800b57cec5SDimitry Andric return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric /// Performs a bitwise NOT of the first 64-bit integer vector, and then 10840b57cec5SDimitry Andric /// performs a bitwise AND of the intermediate result and the second 64-bit 10850b57cec5SDimitry Andric /// integer vector. 10860b57cec5SDimitry Andric /// 10870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10880b57cec5SDimitry Andric /// 10890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PANDN </c> instruction. 10900b57cec5SDimitry Andric /// 10910b57cec5SDimitry Andric /// \param __m1 10920b57cec5SDimitry Andric /// A 64-bit integer vector. The one's complement of this parameter is used 10930b57cec5SDimitry Andric /// in the bitwise AND. 10940b57cec5SDimitry Andric /// \param __m2 10950b57cec5SDimitry Andric /// A 64-bit integer vector. 10960b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise AND of the second 10970b57cec5SDimitry Andric /// parameter and the one's complement of the first parameter. 10980b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 10990b57cec5SDimitry Andric _mm_andnot_si64(__m64 __m1, __m64 __m2) 11000b57cec5SDimitry Andric { 11010b57cec5SDimitry Andric return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric /// Performs a bitwise OR of two 64-bit integer vectors. 11050b57cec5SDimitry Andric /// 11060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11070b57cec5SDimitry Andric /// 11080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> POR </c> instruction. 11090b57cec5SDimitry Andric /// 11100b57cec5SDimitry Andric /// \param __m1 11110b57cec5SDimitry Andric /// A 64-bit integer vector. 11120b57cec5SDimitry Andric /// \param __m2 11130b57cec5SDimitry Andric /// A 64-bit integer vector. 11140b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise OR of both 11150b57cec5SDimitry Andric /// parameters. 11160b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 11170b57cec5SDimitry Andric _mm_or_si64(__m64 __m1, __m64 __m2) 11180b57cec5SDimitry Andric { 11190b57cec5SDimitry Andric return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 11200b57cec5SDimitry Andric } 11210b57cec5SDimitry Andric 11220b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 64-bit integer vectors. 11230b57cec5SDimitry Andric /// 11240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11250b57cec5SDimitry Andric /// 11260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PXOR </c> instruction. 11270b57cec5SDimitry Andric /// 11280b57cec5SDimitry Andric /// \param __m1 11290b57cec5SDimitry Andric /// A 64-bit integer vector. 11300b57cec5SDimitry Andric /// \param __m2 11310b57cec5SDimitry Andric /// A 64-bit integer vector. 11320b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 11330b57cec5SDimitry Andric /// parameters. 11340b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 11350b57cec5SDimitry Andric _mm_xor_si64(__m64 __m1, __m64 __m2) 11360b57cec5SDimitry Andric { 11370b57cec5SDimitry Andric return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric /// Compares the 8-bit integer elements of two 64-bit integer vectors of 11410b57cec5SDimitry Andric /// [8 x i8] to determine if the element of the first vector is equal to the 11420b57cec5SDimitry Andric /// corresponding element of the second vector. 11430b57cec5SDimitry Andric /// 1144*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFF for true. 11450b57cec5SDimitry Andric /// 11460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11470b57cec5SDimitry Andric /// 11480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 11490b57cec5SDimitry Andric /// 11500b57cec5SDimitry Andric /// \param __m1 11510b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 11520b57cec5SDimitry Andric /// \param __m2 11530b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 11540b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 11550b57cec5SDimitry Andric /// results. 11560b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 11570b57cec5SDimitry Andric _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 11580b57cec5SDimitry Andric { 11590b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric /// Compares the 16-bit integer elements of two 64-bit integer vectors of 11630b57cec5SDimitry Andric /// [4 x i16] to determine if the element of the first vector is equal to the 11640b57cec5SDimitry Andric /// corresponding element of the second vector. 11650b57cec5SDimitry Andric /// 1166*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFFFF for true. 11670b57cec5SDimitry Andric /// 11680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11690b57cec5SDimitry Andric /// 11700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 11710b57cec5SDimitry Andric /// 11720b57cec5SDimitry Andric /// \param __m1 11730b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 11740b57cec5SDimitry Andric /// \param __m2 11750b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 11760b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 11770b57cec5SDimitry Andric /// results. 11780b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 11790b57cec5SDimitry Andric _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 11800b57cec5SDimitry Andric { 11810b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 11820b57cec5SDimitry Andric } 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric /// Compares the 32-bit integer elements of two 64-bit integer vectors of 11850b57cec5SDimitry Andric /// [2 x i32] to determine if the element of the first vector is equal to the 11860b57cec5SDimitry Andric /// corresponding element of the second vector. 11870b57cec5SDimitry Andric /// 1188*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 11890b57cec5SDimitry Andric /// 11900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11910b57cec5SDimitry Andric /// 11920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 11930b57cec5SDimitry Andric /// 11940b57cec5SDimitry Andric /// \param __m1 11950b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 11960b57cec5SDimitry Andric /// \param __m2 11970b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 11980b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 11990b57cec5SDimitry Andric /// results. 12000b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 12010b57cec5SDimitry Andric _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 12020b57cec5SDimitry Andric { 12030b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric /// Compares the 8-bit integer elements of two 64-bit integer vectors of 12070b57cec5SDimitry Andric /// [8 x i8] to determine if the element of the first vector is greater than 12080b57cec5SDimitry Andric /// the corresponding element of the second vector. 12090b57cec5SDimitry Andric /// 1210*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFF for true. 12110b57cec5SDimitry Andric /// 12120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12130b57cec5SDimitry Andric /// 12140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 12150b57cec5SDimitry Andric /// 12160b57cec5SDimitry Andric /// \param __m1 12170b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 12180b57cec5SDimitry Andric /// \param __m2 12190b57cec5SDimitry Andric /// A 64-bit integer vector of [8 x i8]. 12200b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 12210b57cec5SDimitry Andric /// results. 12220b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 12230b57cec5SDimitry Andric _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 12240b57cec5SDimitry Andric { 12250b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 12260b57cec5SDimitry Andric } 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric /// Compares the 16-bit integer elements of two 64-bit integer vectors of 12290b57cec5SDimitry Andric /// [4 x i16] to determine if the element of the first vector is greater than 12300b57cec5SDimitry Andric /// the corresponding element of the second vector. 12310b57cec5SDimitry Andric /// 1232*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFFFF for true. 12330b57cec5SDimitry Andric /// 12340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12350b57cec5SDimitry Andric /// 12360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 12370b57cec5SDimitry Andric /// 12380b57cec5SDimitry Andric /// \param __m1 12390b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 12400b57cec5SDimitry Andric /// \param __m2 12410b57cec5SDimitry Andric /// A 64-bit integer vector of [4 x i16]. 12420b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 12430b57cec5SDimitry Andric /// results. 12440b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 12450b57cec5SDimitry Andric _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 12460b57cec5SDimitry Andric { 12470b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 12480b57cec5SDimitry Andric } 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric /// Compares the 32-bit integer elements of two 64-bit integer vectors of 12510b57cec5SDimitry Andric /// [2 x i32] to determine if the element of the first vector is greater than 12520b57cec5SDimitry Andric /// the corresponding element of the second vector. 12530b57cec5SDimitry Andric /// 1254*0fca6ea1SDimitry Andric /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 12550b57cec5SDimitry Andric /// 12560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12570b57cec5SDimitry Andric /// 12580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 12590b57cec5SDimitry Andric /// 12600b57cec5SDimitry Andric /// \param __m1 12610b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 12620b57cec5SDimitry Andric /// \param __m2 12630b57cec5SDimitry Andric /// A 64-bit integer vector of [2 x i32]. 12640b57cec5SDimitry Andric /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 12650b57cec5SDimitry Andric /// results. 12660b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 12670b57cec5SDimitry Andric _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 12680b57cec5SDimitry Andric { 12690b57cec5SDimitry Andric return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 12700b57cec5SDimitry Andric } 12710b57cec5SDimitry Andric 12720b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized to zero. 12730b57cec5SDimitry Andric /// 12740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12750b57cec5SDimitry Andric /// 12760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PXOR </c> instruction. 12770b57cec5SDimitry Andric /// 12780b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector with all elements set to zero. 12790b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 12800b57cec5SDimitry Andric _mm_setzero_si64(void) 12810b57cec5SDimitry Andric { 12820b57cec5SDimitry Andric return __extension__ (__m64){ 0LL }; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified 12860b57cec5SDimitry Andric /// 32-bit integer values. 12870b57cec5SDimitry Andric /// 12880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12890b57cec5SDimitry Andric /// 12900b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 12910b57cec5SDimitry Andric /// instruction. 12920b57cec5SDimitry Andric /// 12930b57cec5SDimitry Andric /// \param __i1 12940b57cec5SDimitry Andric /// A 32-bit integer value used to initialize the upper 32 bits of the 12950b57cec5SDimitry Andric /// result. 12960b57cec5SDimitry Andric /// \param __i0 12970b57cec5SDimitry Andric /// A 32-bit integer value used to initialize the lower 32 bits of the 12980b57cec5SDimitry Andric /// result. 12990b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 13000b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 13010b57cec5SDimitry Andric _mm_set_pi32(int __i1, int __i0) 13020b57cec5SDimitry Andric { 13030b57cec5SDimitry Andric return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified 13070b57cec5SDimitry Andric /// 16-bit integer values. 13080b57cec5SDimitry Andric /// 13090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13100b57cec5SDimitry Andric /// 13110b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 13120b57cec5SDimitry Andric /// instruction. 13130b57cec5SDimitry Andric /// 13140b57cec5SDimitry Andric /// \param __s3 13150b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the result. 13160b57cec5SDimitry Andric /// \param __s2 13170b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the result. 13180b57cec5SDimitry Andric /// \param __s1 13190b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the result. 13200b57cec5SDimitry Andric /// \param __s0 13210b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the result. 13220b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 13230b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 13240b57cec5SDimitry Andric _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 13250b57cec5SDimitry Andric { 13260b57cec5SDimitry Andric return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 13270b57cec5SDimitry Andric } 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric /// Constructs a 64-bit integer vector initialized with the specified 13300b57cec5SDimitry Andric /// 8-bit integer values. 13310b57cec5SDimitry Andric /// 13320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13330b57cec5SDimitry Andric /// 13340b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 13350b57cec5SDimitry Andric /// instruction. 13360b57cec5SDimitry Andric /// 13370b57cec5SDimitry Andric /// \param __b7 13380b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [63:56] of the result. 13390b57cec5SDimitry Andric /// \param __b6 13400b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [55:48] of the result. 13410b57cec5SDimitry Andric /// \param __b5 13420b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [47:40] of the result. 13430b57cec5SDimitry Andric /// \param __b4 13440b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [39:32] of the result. 13450b57cec5SDimitry Andric /// \param __b3 13460b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [31:24] of the result. 13470b57cec5SDimitry Andric /// \param __b2 13480b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [23:16] of the result. 13490b57cec5SDimitry Andric /// \param __b1 13500b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [15:8] of the result. 13510b57cec5SDimitry Andric /// \param __b0 13520b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [7:0] of the result. 13530b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 13540b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 13550b57cec5SDimitry Andric _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 13560b57cec5SDimitry Andric char __b1, char __b0) 13570b57cec5SDimitry Andric { 13580b57cec5SDimitry Andric return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 13590b57cec5SDimitry Andric __b4, __b5, __b6, __b7); 13600b57cec5SDimitry Andric } 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [2 x i32], with each of the 13630b57cec5SDimitry Andric /// 32-bit integer vector elements set to the specified 32-bit integer 13640b57cec5SDimitry Andric /// value. 13650b57cec5SDimitry Andric /// 13660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13670b57cec5SDimitry Andric /// 13680b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 13690b57cec5SDimitry Andric /// instruction. 13700b57cec5SDimitry Andric /// 13710b57cec5SDimitry Andric /// \param __i 13720b57cec5SDimitry Andric /// A 32-bit integer value used to initialize each vector element of the 13730b57cec5SDimitry Andric /// result. 13740b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [2 x i32]. 13750b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 13760b57cec5SDimitry Andric _mm_set1_pi32(int __i) 13770b57cec5SDimitry Andric { 13780b57cec5SDimitry Andric return _mm_set_pi32(__i, __i); 13790b57cec5SDimitry Andric } 13800b57cec5SDimitry Andric 13810b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [4 x i16], with each of the 13820b57cec5SDimitry Andric /// 16-bit integer vector elements set to the specified 16-bit integer 13830b57cec5SDimitry Andric /// value. 13840b57cec5SDimitry Andric /// 13850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13860b57cec5SDimitry Andric /// 13870b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 13880b57cec5SDimitry Andric /// instruction. 13890b57cec5SDimitry Andric /// 13900b57cec5SDimitry Andric /// \param __w 13910b57cec5SDimitry Andric /// A 16-bit integer value used to initialize each vector element of the 13920b57cec5SDimitry Andric /// result. 13930b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [4 x i16]. 13940b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 13950b57cec5SDimitry Andric _mm_set1_pi16(short __w) 13960b57cec5SDimitry Andric { 13970b57cec5SDimitry Andric return _mm_set_pi16(__w, __w, __w, __w); 13980b57cec5SDimitry Andric } 13990b57cec5SDimitry Andric 14000b57cec5SDimitry Andric /// Constructs a 64-bit integer vector of [8 x i8], with each of the 14010b57cec5SDimitry Andric /// 8-bit integer vector elements set to the specified 8-bit integer value. 14020b57cec5SDimitry Andric /// 14030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14040b57cec5SDimitry Andric /// 14050b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 14060b57cec5SDimitry Andric /// instruction. 14070b57cec5SDimitry Andric /// 14080b57cec5SDimitry Andric /// \param __b 14090b57cec5SDimitry Andric /// An 8-bit integer value used to initialize each vector element of the 14100b57cec5SDimitry Andric /// result. 14110b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector of [8 x i8]. 14120b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 14130b57cec5SDimitry Andric _mm_set1_pi8(char __b) 14140b57cec5SDimitry Andric { 14150b57cec5SDimitry Andric return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 14160b57cec5SDimitry Andric } 14170b57cec5SDimitry Andric 14180b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with 14190b57cec5SDimitry Andric /// the specified 32-bit integer values. 14200b57cec5SDimitry Andric /// 14210b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14220b57cec5SDimitry Andric /// 14230b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 14240b57cec5SDimitry Andric /// instruction. 14250b57cec5SDimitry Andric /// 14260b57cec5SDimitry Andric /// \param __i0 14270b57cec5SDimitry Andric /// A 32-bit integer value used to initialize the lower 32 bits of the 14280b57cec5SDimitry Andric /// result. 14290b57cec5SDimitry Andric /// \param __i1 14300b57cec5SDimitry Andric /// A 32-bit integer value used to initialize the upper 32 bits of the 14310b57cec5SDimitry Andric /// result. 14320b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 14330b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 14340b57cec5SDimitry Andric _mm_setr_pi32(int __i0, int __i1) 14350b57cec5SDimitry Andric { 14360b57cec5SDimitry Andric return _mm_set_pi32(__i1, __i0); 14370b57cec5SDimitry Andric } 14380b57cec5SDimitry Andric 14390b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with 14400b57cec5SDimitry Andric /// the specified 16-bit integer values. 14410b57cec5SDimitry Andric /// 14420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14430b57cec5SDimitry Andric /// 14440b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 14450b57cec5SDimitry Andric /// instruction. 14460b57cec5SDimitry Andric /// 14470b57cec5SDimitry Andric /// \param __w0 14480b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the result. 14490b57cec5SDimitry Andric /// \param __w1 14500b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the result. 14510b57cec5SDimitry Andric /// \param __w2 14520b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the result. 14530b57cec5SDimitry Andric /// \param __w3 14540b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the result. 14550b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 14560b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 14570b57cec5SDimitry Andric _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 14580b57cec5SDimitry Andric { 14590b57cec5SDimitry Andric return _mm_set_pi16(__w3, __w2, __w1, __w0); 14600b57cec5SDimitry Andric } 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric /// Constructs a 64-bit integer vector, initialized in reverse order with 14630b57cec5SDimitry Andric /// the specified 8-bit integer values. 14640b57cec5SDimitry Andric /// 14650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14660b57cec5SDimitry Andric /// 14670b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 14680b57cec5SDimitry Andric /// instruction. 14690b57cec5SDimitry Andric /// 14700b57cec5SDimitry Andric /// \param __b0 14710b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [7:0] of the result. 14720b57cec5SDimitry Andric /// \param __b1 14730b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [15:8] of the result. 14740b57cec5SDimitry Andric /// \param __b2 14750b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [23:16] of the result. 14760b57cec5SDimitry Andric /// \param __b3 14770b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [31:24] of the result. 14780b57cec5SDimitry Andric /// \param __b4 14790b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [39:32] of the result. 14800b57cec5SDimitry Andric /// \param __b5 14810b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [47:40] of the result. 14820b57cec5SDimitry Andric /// \param __b6 14830b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [55:48] of the result. 14840b57cec5SDimitry Andric /// \param __b7 14850b57cec5SDimitry Andric /// An 8-bit integer value used to initialize bits [63:56] of the result. 14860b57cec5SDimitry Andric /// \returns An initialized 64-bit integer vector. 14870b57cec5SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS 14880b57cec5SDimitry Andric _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 14890b57cec5SDimitry Andric char __b6, char __b7) 14900b57cec5SDimitry Andric { 14910b57cec5SDimitry Andric return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 14920b57cec5SDimitry Andric } 14930b57cec5SDimitry Andric 14940b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andric /* Aliases for compatibility. */ 14970b57cec5SDimitry Andric #define _m_empty _mm_empty 14980b57cec5SDimitry Andric #define _m_from_int _mm_cvtsi32_si64 14990b57cec5SDimitry Andric #define _m_from_int64 _mm_cvtsi64_m64 15000b57cec5SDimitry Andric #define _m_to_int _mm_cvtsi64_si32 15010b57cec5SDimitry Andric #define _m_to_int64 _mm_cvtm64_si64 15020b57cec5SDimitry Andric #define _m_packsswb _mm_packs_pi16 15030b57cec5SDimitry Andric #define _m_packssdw _mm_packs_pi32 15040b57cec5SDimitry Andric #define _m_packuswb _mm_packs_pu16 15050b57cec5SDimitry Andric #define _m_punpckhbw _mm_unpackhi_pi8 15060b57cec5SDimitry Andric #define _m_punpckhwd _mm_unpackhi_pi16 15070b57cec5SDimitry Andric #define _m_punpckhdq _mm_unpackhi_pi32 15080b57cec5SDimitry Andric #define _m_punpcklbw _mm_unpacklo_pi8 15090b57cec5SDimitry Andric #define _m_punpcklwd _mm_unpacklo_pi16 15100b57cec5SDimitry Andric #define _m_punpckldq _mm_unpacklo_pi32 15110b57cec5SDimitry Andric #define _m_paddb _mm_add_pi8 15120b57cec5SDimitry Andric #define _m_paddw _mm_add_pi16 15130b57cec5SDimitry Andric #define _m_paddd _mm_add_pi32 15140b57cec5SDimitry Andric #define _m_paddsb _mm_adds_pi8 15150b57cec5SDimitry Andric #define _m_paddsw _mm_adds_pi16 15160b57cec5SDimitry Andric #define _m_paddusb _mm_adds_pu8 15170b57cec5SDimitry Andric #define _m_paddusw _mm_adds_pu16 15180b57cec5SDimitry Andric #define _m_psubb _mm_sub_pi8 15190b57cec5SDimitry Andric #define _m_psubw _mm_sub_pi16 15200b57cec5SDimitry Andric #define _m_psubd _mm_sub_pi32 15210b57cec5SDimitry Andric #define _m_psubsb _mm_subs_pi8 15220b57cec5SDimitry Andric #define _m_psubsw _mm_subs_pi16 15230b57cec5SDimitry Andric #define _m_psubusb _mm_subs_pu8 15240b57cec5SDimitry Andric #define _m_psubusw _mm_subs_pu16 15250b57cec5SDimitry Andric #define _m_pmaddwd _mm_madd_pi16 15260b57cec5SDimitry Andric #define _m_pmulhw _mm_mulhi_pi16 15270b57cec5SDimitry Andric #define _m_pmullw _mm_mullo_pi16 15280b57cec5SDimitry Andric #define _m_psllw _mm_sll_pi16 15290b57cec5SDimitry Andric #define _m_psllwi _mm_slli_pi16 15300b57cec5SDimitry Andric #define _m_pslld _mm_sll_pi32 15310b57cec5SDimitry Andric #define _m_pslldi _mm_slli_pi32 15320b57cec5SDimitry Andric #define _m_psllq _mm_sll_si64 15330b57cec5SDimitry Andric #define _m_psllqi _mm_slli_si64 15340b57cec5SDimitry Andric #define _m_psraw _mm_sra_pi16 15350b57cec5SDimitry Andric #define _m_psrawi _mm_srai_pi16 15360b57cec5SDimitry Andric #define _m_psrad _mm_sra_pi32 15370b57cec5SDimitry Andric #define _m_psradi _mm_srai_pi32 15380b57cec5SDimitry Andric #define _m_psrlw _mm_srl_pi16 15390b57cec5SDimitry Andric #define _m_psrlwi _mm_srli_pi16 15400b57cec5SDimitry Andric #define _m_psrld _mm_srl_pi32 15410b57cec5SDimitry Andric #define _m_psrldi _mm_srli_pi32 15420b57cec5SDimitry Andric #define _m_psrlq _mm_srl_si64 15430b57cec5SDimitry Andric #define _m_psrlqi _mm_srli_si64 15440b57cec5SDimitry Andric #define _m_pand _mm_and_si64 15450b57cec5SDimitry Andric #define _m_pandn _mm_andnot_si64 15460b57cec5SDimitry Andric #define _m_por _mm_or_si64 15470b57cec5SDimitry Andric #define _m_pxor _mm_xor_si64 15480b57cec5SDimitry Andric #define _m_pcmpeqb _mm_cmpeq_pi8 15490b57cec5SDimitry Andric #define _m_pcmpeqw _mm_cmpeq_pi16 15500b57cec5SDimitry Andric #define _m_pcmpeqd _mm_cmpeq_pi32 15510b57cec5SDimitry Andric #define _m_pcmpgtb _mm_cmpgt_pi8 15520b57cec5SDimitry Andric #define _m_pcmpgtw _mm_cmpgt_pi16 15530b57cec5SDimitry Andric #define _m_pcmpgtd _mm_cmpgt_pi32 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andric #endif /* __MMINTRIN_H */ 15560b57cec5SDimitry Andric 1557