10b57cec5SDimitry Andric /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __EMMINTRIN_H 110b57cec5SDimitry Andric #define __EMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <xmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); 200b57cec5SDimitry Andric typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); 2381ad6265SDimitry Andric typedef long long __m128i_u 2481ad6265SDimitry Andric __attribute__((__vector_size__(16), __aligned__(1))); 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric /* Type defines. */ 270b57cec5SDimitry Andric typedef double __v2df __attribute__((__vector_size__(16))); 280b57cec5SDimitry Andric typedef long long __v2di __attribute__((__vector_size__(16))); 290b57cec5SDimitry Andric typedef short __v8hi __attribute__((__vector_size__(16))); 300b57cec5SDimitry Andric typedef char __v16qi __attribute__((__vector_size__(16))); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric /* Unsigned types */ 330b57cec5SDimitry Andric typedef unsigned long long __v2du __attribute__((__vector_size__(16))); 340b57cec5SDimitry Andric typedef unsigned short __v8hu __attribute__((__vector_size__(16))); 350b57cec5SDimitry Andric typedef unsigned char __v16qu __attribute__((__vector_size__(16))); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 380b57cec5SDimitry Andric * appear in the interface though. */ 390b57cec5SDimitry Andric typedef signed char __v16qs __attribute__((__vector_size__(16))); 400b57cec5SDimitry Andric 41bdd1243dSDimitry Andric #ifdef __SSE2__ 42bdd1243dSDimitry Andric /* Both _Float16 and __bf16 require SSE2 being enabled. */ 43bdd1243dSDimitry Andric typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16))); 44bdd1243dSDimitry Andric typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16))); 45bdd1243dSDimitry Andric typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1))); 46bdd1243dSDimitry Andric 47bdd1243dSDimitry Andric typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16))); 48bdd1243dSDimitry Andric typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); 49bdd1243dSDimitry Andric #endif 50bdd1243dSDimitry Andric 510b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 5281ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS \ 535f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 545f757f3fSDimitry Andric __target__("sse2,no-evex512"), __min_vector_width__(128))) 5581ad6265SDimitry Andric #define __DEFAULT_FN_ATTRS_MMX \ 565f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 575f757f3fSDimitry Andric __target__("mmx,sse2,no-evex512"), __min_vector_width__(64))) 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric /// Adds lower double-precision values in both operands and returns the 600b57cec5SDimitry Andric /// sum in the lower 64 bits of the result. The upper 64 bits of the result 610b57cec5SDimitry Andric /// are copied from the upper double-precision value of the first operand. 620b57cec5SDimitry Andric /// 630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 640b57cec5SDimitry Andric /// 650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction. 660b57cec5SDimitry Andric /// 670b57cec5SDimitry Andric /// \param __a 680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 690b57cec5SDimitry Andric /// \param __b 700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 710b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 720b57cec5SDimitry Andric /// sum of the lower 64 bits of both operands. The upper 64 bits are copied 730b57cec5SDimitry Andric /// from the upper 64 bits of the first source operand. 7481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, 7581ad6265SDimitry Andric __m128d __b) { 760b57cec5SDimitry Andric __a[0] += __b[0]; 770b57cec5SDimitry Andric return __a; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// Adds two 128-bit vectors of [2 x double]. 810b57cec5SDimitry Andric /// 820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 830b57cec5SDimitry Andric /// 840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction. 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// \param __a 870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 880b57cec5SDimitry Andric /// \param __b 890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the sums of both 910b57cec5SDimitry Andric /// operands. 9281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, 9381ad6265SDimitry Andric __m128d __b) { 940b57cec5SDimitry Andric return (__m128d)((__v2df)__a + (__v2df)__b); 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric /// Subtracts the lower double-precision value of the second operand 980b57cec5SDimitry Andric /// from the lower double-precision value of the first operand and returns 990b57cec5SDimitry Andric /// the difference in the lower 64 bits of the result. The upper 64 bits of 1000b57cec5SDimitry Andric /// the result are copied from the upper double-precision value of the first 1010b57cec5SDimitry Andric /// operand. 1020b57cec5SDimitry Andric /// 1030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1040b57cec5SDimitry Andric /// 1050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction. 1060b57cec5SDimitry Andric /// 1070b57cec5SDimitry Andric /// \param __a 1080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1090b57cec5SDimitry Andric /// \param __b 1100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1110b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1120b57cec5SDimitry Andric /// difference of the lower 64 bits of both operands. The upper 64 bits are 1130b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 11481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, 11581ad6265SDimitry Andric __m128d __b) { 1160b57cec5SDimitry Andric __a[0] -= __b[0]; 1170b57cec5SDimitry Andric return __a; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric /// Subtracts two 128-bit vectors of [2 x double]. 1210b57cec5SDimitry Andric /// 1220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1230b57cec5SDimitry Andric /// 1240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction. 1250b57cec5SDimitry Andric /// 1260b57cec5SDimitry Andric /// \param __a 1270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the minuend. 1280b57cec5SDimitry Andric /// \param __b 1290b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 1300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the differences between 1310b57cec5SDimitry Andric /// both operands. 13281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, 13381ad6265SDimitry Andric __m128d __b) { 1340b57cec5SDimitry Andric return (__m128d)((__v2df)__a - (__v2df)__b); 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric /// Multiplies lower double-precision values in both operands and returns 1380b57cec5SDimitry Andric /// the product in the lower 64 bits of the result. The upper 64 bits of the 1390b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1400b57cec5SDimitry Andric /// operand. 1410b57cec5SDimitry Andric /// 1420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1430b57cec5SDimitry Andric /// 1440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction. 1450b57cec5SDimitry Andric /// 1460b57cec5SDimitry Andric /// \param __a 1470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1480b57cec5SDimitry Andric /// \param __b 1490b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1500b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1510b57cec5SDimitry Andric /// product of the lower 64 bits of both operands. The upper 64 bits are 1520b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 15381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, 15481ad6265SDimitry Andric __m128d __b) { 1550b57cec5SDimitry Andric __a[0] *= __b[0]; 1560b57cec5SDimitry Andric return __a; 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric /// Multiplies two 128-bit vectors of [2 x double]. 1600b57cec5SDimitry Andric /// 1610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1620b57cec5SDimitry Andric /// 1630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction. 1640b57cec5SDimitry Andric /// 1650b57cec5SDimitry Andric /// \param __a 1660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1670b57cec5SDimitry Andric /// \param __b 1680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 1690b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the products of both 1700b57cec5SDimitry Andric /// operands. 17181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, 17281ad6265SDimitry Andric __m128d __b) { 1730b57cec5SDimitry Andric return (__m128d)((__v2df)__a * (__v2df)__b); 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric /// Divides the lower double-precision value of the first operand by the 1770b57cec5SDimitry Andric /// lower double-precision value of the second operand and returns the 1780b57cec5SDimitry Andric /// quotient in the lower 64 bits of the result. The upper 64 bits of the 1790b57cec5SDimitry Andric /// result are copied from the upper double-precision value of the first 1800b57cec5SDimitry Andric /// operand. 1810b57cec5SDimitry Andric /// 1820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1830b57cec5SDimitry Andric /// 1840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction. 1850b57cec5SDimitry Andric /// 1860b57cec5SDimitry Andric /// \param __a 1870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 1880b57cec5SDimitry Andric /// \param __b 1890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing divisor. 1900b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 1910b57cec5SDimitry Andric /// quotient of the lower 64 bits of both operands. The upper 64 bits are 1920b57cec5SDimitry Andric /// copied from the upper 64 bits of the first source operand. 19381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, 19481ad6265SDimitry Andric __m128d __b) { 1950b57cec5SDimitry Andric __a[0] /= __b[0]; 1960b57cec5SDimitry Andric return __a; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// Performs an element-by-element division of two 128-bit vectors of 2000b57cec5SDimitry Andric /// [2 x double]. 2010b57cec5SDimitry Andric /// 2020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2030b57cec5SDimitry Andric /// 2040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction. 2050b57cec5SDimitry Andric /// 2060b57cec5SDimitry Andric /// \param __a 2070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the dividend. 2080b57cec5SDimitry Andric /// \param __b 2090b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the divisor. 2100b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the quotients of both 2110b57cec5SDimitry Andric /// operands. 21281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, 21381ad6265SDimitry Andric __m128d __b) { 2140b57cec5SDimitry Andric return (__m128d)((__v2df)__a / (__v2df)__b); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric /// Calculates the square root of the lower double-precision value of 2180b57cec5SDimitry Andric /// the second operand and returns it in the lower 64 bits of the result. 2190b57cec5SDimitry Andric /// The upper 64 bits of the result are copied from the upper 2200b57cec5SDimitry Andric /// double-precision value of the first operand. 2210b57cec5SDimitry Andric /// 2220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2230b57cec5SDimitry Andric /// 2240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction. 2250b57cec5SDimitry Andric /// 2260b57cec5SDimitry Andric /// \param __a 2270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2280b57cec5SDimitry Andric /// upper 64 bits of this operand are copied to the upper 64 bits of the 2290b57cec5SDimitry Andric /// result. 2300b57cec5SDimitry Andric /// \param __b 2310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2320b57cec5SDimitry Andric /// square root is calculated using the lower 64 bits of this operand. 2330b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2340b57cec5SDimitry Andric /// square root of the lower 64 bits of operand \a __b, and whose upper 64 2350b57cec5SDimitry Andric /// bits are copied from the upper 64 bits of operand \a __a. 23681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, 23781ad6265SDimitry Andric __m128d __b) { 2380b57cec5SDimitry Andric __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b); 2390b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric /// Calculates the square root of the each of two values stored in a 2430b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2440b57cec5SDimitry Andric /// 2450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2460b57cec5SDimitry Andric /// 2470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction. 2480b57cec5SDimitry Andric /// 2490b57cec5SDimitry Andric /// \param __a 2500b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 2510b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the square roots of the 2520b57cec5SDimitry Andric /// values in the operand. 25381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { 2540b57cec5SDimitry Andric return __builtin_ia32_sqrtpd((__v2df)__a); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 2580b57cec5SDimitry Andric /// returns the lesser of the pair of values in the lower 64-bits of the 2590b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 2600b57cec5SDimitry Andric /// double-precision value of the first operand. 2610b57cec5SDimitry Andric /// 2620fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns the value from \a __b. 2630fca6ea1SDimitry Andric /// 2640b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2650b57cec5SDimitry Andric /// 2660b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction. 2670b57cec5SDimitry Andric /// 2680b57cec5SDimitry Andric /// \param __a 2690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2700b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2710b57cec5SDimitry Andric /// \param __b 2720b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 2730b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 2740b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 2750b57cec5SDimitry Andric /// minimum value between both operands. The upper 64 bits are copied from 2760b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 27781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, 27881ad6265SDimitry Andric __m128d __b) { 2790b57cec5SDimitry Andric return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); 2800b57cec5SDimitry Andric } 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 2830fca6ea1SDimitry Andric /// [2 x double] and returns a vector containing the lesser of each pair of 2840b57cec5SDimitry Andric /// values. 2850b57cec5SDimitry Andric /// 2860fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns the value from \a __b. 2870fca6ea1SDimitry Andric /// 2880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2890b57cec5SDimitry Andric /// 2900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction. 2910b57cec5SDimitry Andric /// 2920b57cec5SDimitry Andric /// \param __a 2930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2940b57cec5SDimitry Andric /// \param __b 2950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 2960b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the minimum values 2970b57cec5SDimitry Andric /// between both operands. 29881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, 29981ad6265SDimitry Andric __m128d __b) { 3000b57cec5SDimitry Andric return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric /// Compares lower 64-bit double-precision values of both operands, and 3040b57cec5SDimitry Andric /// returns the greater of the pair of values in the lower 64-bits of the 3050b57cec5SDimitry Andric /// result. The upper 64 bits of the result are copied from the upper 3060b57cec5SDimitry Andric /// double-precision value of the first operand. 3070b57cec5SDimitry Andric /// 3080fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns the value from \a __b. 3090fca6ea1SDimitry Andric /// 3100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3110b57cec5SDimitry Andric /// 3120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction. 3130b57cec5SDimitry Andric /// 3140b57cec5SDimitry Andric /// \param __a 3150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3160b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3170b57cec5SDimitry Andric /// \param __b 3180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. The 3190b57cec5SDimitry Andric /// lower 64 bits of this operand are used in the comparison. 3200b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 3210b57cec5SDimitry Andric /// maximum value between both operands. The upper 64 bits are copied from 3220b57cec5SDimitry Andric /// the upper 64 bits of the first source operand. 32381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, 32481ad6265SDimitry Andric __m128d __b) { 3250b57cec5SDimitry Andric return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric /// Performs element-by-element comparison of the two 128-bit vectors of 3290fca6ea1SDimitry Andric /// [2 x double] and returns a vector containing the greater of each pair 3300b57cec5SDimitry Andric /// of values. 3310b57cec5SDimitry Andric /// 3320fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns the value from \a __b. 3330fca6ea1SDimitry Andric /// 3340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3350b57cec5SDimitry Andric /// 3360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction. 3370b57cec5SDimitry Andric /// 3380b57cec5SDimitry Andric /// \param __a 3390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3400b57cec5SDimitry Andric /// \param __b 3410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the operands. 3420b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the maximum values 3430b57cec5SDimitry Andric /// between both operands. 34481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, 34581ad6265SDimitry Andric __m128d __b) { 3460b57cec5SDimitry Andric return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double]. 3500b57cec5SDimitry Andric /// 3510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3520b57cec5SDimitry Andric /// 3530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 3540b57cec5SDimitry Andric /// 3550b57cec5SDimitry Andric /// \param __a 3560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3570b57cec5SDimitry Andric /// \param __b 3580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3590b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3600b57cec5SDimitry Andric /// values between both operands. 36181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, 36281ad6265SDimitry Andric __m128d __b) { 3630b57cec5SDimitry Andric return (__m128d)((__v2du)__a & (__v2du)__b); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit vectors of [2 x double], using 3670b57cec5SDimitry Andric /// the one's complement of the values contained in the first source operand. 3680b57cec5SDimitry Andric /// 3690b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3700b57cec5SDimitry Andric /// 3710b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 3720b57cec5SDimitry Andric /// 3730b57cec5SDimitry Andric /// \param __a 3740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. The 3750b57cec5SDimitry Andric /// one's complement of this value is used in the bitwise AND. 3760b57cec5SDimitry Andric /// \param __b 3770b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 3780b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the 3790b57cec5SDimitry Andric /// values in the second operand and the one's complement of the first 3800b57cec5SDimitry Andric /// operand. 38181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, 38281ad6265SDimitry Andric __m128d __b) { 3830b57cec5SDimitry Andric return (__m128d)(~(__v2du)__a & (__v2du)__b); 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit vectors of [2 x double]. 3870b57cec5SDimitry Andric /// 3880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 3890b57cec5SDimitry Andric /// 3900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 3910b57cec5SDimitry Andric /// 3920b57cec5SDimitry Andric /// \param __a 3930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3940b57cec5SDimitry Andric /// \param __b 3950b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 3960b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the 3970b57cec5SDimitry Andric /// values between both operands. 39881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, 39981ad6265SDimitry Andric __m128d __b) { 4000b57cec5SDimitry Andric return (__m128d)((__v2du)__a | (__v2du)__b); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric /// Performs a bitwise XOR of two 128-bit vectors of [2 x double]. 4040b57cec5SDimitry Andric /// 4050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4060b57cec5SDimitry Andric /// 4070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 4080b57cec5SDimitry Andric /// 4090b57cec5SDimitry Andric /// \param __a 4100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4110b57cec5SDimitry Andric /// \param __b 4120b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 4130b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the 4140b57cec5SDimitry Andric /// values between both operands. 41581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, 41681ad6265SDimitry Andric __m128d __b) { 4170b57cec5SDimitry Andric return (__m128d)((__v2du)__a ^ (__v2du)__b); 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4210fca6ea1SDimitry Andric /// 128-bit vectors of [2 x double] for equality. 4220fca6ea1SDimitry Andric /// 4230fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4240fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 4250b57cec5SDimitry Andric /// 4260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4270b57cec5SDimitry Andric /// 4280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction. 4290b57cec5SDimitry Andric /// 4300b57cec5SDimitry Andric /// \param __a 4310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4320b57cec5SDimitry Andric /// \param __b 4330b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4340b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 43581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, 43681ad6265SDimitry Andric __m128d __b) { 4370b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b); 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4410b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4420fca6ea1SDimitry Andric /// operand are less than those in the second operand. 4430fca6ea1SDimitry Andric /// 4440fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4450fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 4460b57cec5SDimitry Andric /// 4470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4480b57cec5SDimitry Andric /// 4490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4500b57cec5SDimitry Andric /// 4510b57cec5SDimitry Andric /// \param __a 4520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4530b57cec5SDimitry Andric /// \param __b 4540b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4550b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 45681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, 45781ad6265SDimitry Andric __m128d __b) { 4580b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4620b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4630b57cec5SDimitry Andric /// operand are less than or equal to those in the second operand. 4640b57cec5SDimitry Andric /// 4650fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4660fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 4670b57cec5SDimitry Andric /// 4680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4690b57cec5SDimitry Andric /// 4700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 4710b57cec5SDimitry Andric /// 4720b57cec5SDimitry Andric /// \param __a 4730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4740b57cec5SDimitry Andric /// \param __b 4750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4760b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 47781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, 47881ad6265SDimitry Andric __m128d __b) { 4790b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 4830b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 4840b57cec5SDimitry Andric /// operand are greater than those in the second operand. 4850b57cec5SDimitry Andric /// 4860fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 4870fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 4880b57cec5SDimitry Andric /// 4890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 4900b57cec5SDimitry Andric /// 4910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction. 4920b57cec5SDimitry Andric /// 4930b57cec5SDimitry Andric /// \param __a 4940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4950b57cec5SDimitry Andric /// \param __b 4960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 4970b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 49881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, 49981ad6265SDimitry Andric __m128d __b) { 5000b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a); 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5040b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5050b57cec5SDimitry Andric /// operand are greater than or equal to those in the second operand. 5060b57cec5SDimitry Andric /// 5070fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5080fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 5090b57cec5SDimitry Andric /// 5100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5110b57cec5SDimitry Andric /// 5120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction. 5130b57cec5SDimitry Andric /// 5140b57cec5SDimitry Andric /// \param __a 5150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5160b57cec5SDimitry Andric /// \param __b 5170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5180b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 51981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, 52081ad6265SDimitry Andric __m128d __b) { 5210b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a); 5220b57cec5SDimitry Andric } 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5250b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5260b57cec5SDimitry Andric /// operand are ordered with respect to those in the second operand. 5270b57cec5SDimitry Andric /// 5280fca6ea1SDimitry Andric /// A pair of double-precision values are ordered with respect to each 5290fca6ea1SDimitry Andric /// other if neither value is a NaN. Each comparison returns 0x0 for false, 5300b57cec5SDimitry Andric /// 0xFFFFFFFFFFFFFFFF for true. 5310b57cec5SDimitry Andric /// 5320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5330b57cec5SDimitry Andric /// 5340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction. 5350b57cec5SDimitry Andric /// 5360b57cec5SDimitry Andric /// \param __a 5370b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5380b57cec5SDimitry Andric /// \param __b 5390b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5400b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 54181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, 54281ad6265SDimitry Andric __m128d __b) { 5430b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b); 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5470b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5480b57cec5SDimitry Andric /// operand are unordered with respect to those in the second operand. 5490b57cec5SDimitry Andric /// 5500fca6ea1SDimitry Andric /// A pair of double-precision values are unordered with respect to each 5510fca6ea1SDimitry Andric /// other if one or both values are NaN. Each comparison returns 0x0 for 5520b57cec5SDimitry Andric /// false, 0xFFFFFFFFFFFFFFFF for true. 5530b57cec5SDimitry Andric /// 5540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5550b57cec5SDimitry Andric /// 5560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c> 5570b57cec5SDimitry Andric /// instruction. 5580b57cec5SDimitry Andric /// 5590b57cec5SDimitry Andric /// \param __a 5600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5610b57cec5SDimitry Andric /// \param __b 5620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5630b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 56481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, 56581ad6265SDimitry Andric __m128d __b) { 5660b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5700b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5710b57cec5SDimitry Andric /// operand are unequal to those in the second operand. 5720b57cec5SDimitry Andric /// 5730fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5740fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 5750b57cec5SDimitry Andric /// 5760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5770b57cec5SDimitry Andric /// 5780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction. 5790b57cec5SDimitry Andric /// 5800b57cec5SDimitry Andric /// \param __a 5810b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5820b57cec5SDimitry Andric /// \param __b 5830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5840b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 58581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, 58681ad6265SDimitry Andric __m128d __b) { 5870b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 5910b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 5920b57cec5SDimitry Andric /// operand are not less than those in the second operand. 5930b57cec5SDimitry Andric /// 5940fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 5950fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 5960b57cec5SDimitry Andric /// 5970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5980b57cec5SDimitry Andric /// 5990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6000b57cec5SDimitry Andric /// 6010b57cec5SDimitry Andric /// \param __a 6020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6030b57cec5SDimitry Andric /// \param __b 6040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6050b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 60681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, 60781ad6265SDimitry Andric __m128d __b) { 6080b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b); 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6120b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6130b57cec5SDimitry Andric /// operand are not less than or equal to those in the second operand. 6140b57cec5SDimitry Andric /// 6150fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6160fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 6170b57cec5SDimitry Andric /// 6180b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6190b57cec5SDimitry Andric /// 6200b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6210b57cec5SDimitry Andric /// 6220b57cec5SDimitry Andric /// \param __a 6230b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6240b57cec5SDimitry Andric /// \param __b 6250b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6260b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 62781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, 62881ad6265SDimitry Andric __m128d __b) { 6290b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b); 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6330b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6340b57cec5SDimitry Andric /// operand are not greater than those in the second operand. 6350b57cec5SDimitry Andric /// 6360fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6370fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 6380b57cec5SDimitry Andric /// 6390b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6400b57cec5SDimitry Andric /// 6410b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction. 6420b57cec5SDimitry Andric /// 6430b57cec5SDimitry Andric /// \param __a 6440b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6450b57cec5SDimitry Andric /// \param __b 6460b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6470b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 64881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, 64981ad6265SDimitry Andric __m128d __b) { 6500b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a); 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric /// Compares each of the corresponding double-precision values of the 6540b57cec5SDimitry Andric /// 128-bit vectors of [2 x double] to determine if the values in the first 6550b57cec5SDimitry Andric /// operand are not greater than or equal to those in the second operand. 6560b57cec5SDimitry Andric /// 6570fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6580fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 6590b57cec5SDimitry Andric /// 6600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6610b57cec5SDimitry Andric /// 6620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction. 6630b57cec5SDimitry Andric /// 6640b57cec5SDimitry Andric /// \param __a 6650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6660b57cec5SDimitry Andric /// \param __b 6670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 6680b57cec5SDimitry Andric /// \returns A 128-bit vector containing the comparison results. 66981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, 67081ad6265SDimitry Andric __m128d __b) { 6710b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a); 6720b57cec5SDimitry Andric } 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6750b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 6760b57cec5SDimitry Andric /// 6770fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 6780fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 6790b57cec5SDimitry Andric /// 6800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6810b57cec5SDimitry Andric /// 6820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction. 6830b57cec5SDimitry Andric /// 6840b57cec5SDimitry Andric /// \param __a 6850b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6860b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 6870b57cec5SDimitry Andric /// \param __b 6880b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 6890b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 6900b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 6910b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 69281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, 69381ad6265SDimitry Andric __m128d __b) { 6940b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b); 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 6980b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 6990b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 7000b57cec5SDimitry Andric /// the second parameter. 7010b57cec5SDimitry Andric /// 7020fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7030fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 7040b57cec5SDimitry Andric /// 7050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7060b57cec5SDimitry Andric /// 7070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7080b57cec5SDimitry Andric /// 7090b57cec5SDimitry Andric /// \param __a 7100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7110b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7120b57cec5SDimitry Andric /// \param __b 7130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7140b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7150b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7160b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 71781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, 71881ad6265SDimitry Andric __m128d __b) { 7190b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7230b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7240b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 7250b57cec5SDimitry Andric /// corresponding value in the second parameter. 7260b57cec5SDimitry Andric /// 7270fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7280fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 7290b57cec5SDimitry Andric /// 7300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7310b57cec5SDimitry Andric /// 7320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7330b57cec5SDimitry Andric /// 7340b57cec5SDimitry Andric /// \param __a 7350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7360b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7370b57cec5SDimitry Andric /// \param __b 7380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7390b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7400b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7410b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 74281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, 74381ad6265SDimitry Andric __m128d __b) { 7440b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b); 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7480b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7490b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 7500b57cec5SDimitry Andric /// in the second parameter. 7510b57cec5SDimitry Andric /// 7520fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7530fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 7540b57cec5SDimitry Andric /// 7550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7560b57cec5SDimitry Andric /// 7570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction. 7580b57cec5SDimitry Andric /// 7590b57cec5SDimitry Andric /// \param __a 7600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7610b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7620b57cec5SDimitry Andric /// \param __b 7630b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7640b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7650b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7660b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 76781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, 76881ad6265SDimitry Andric __m128d __b) { 7690b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a); 7700b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 7740b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 7750b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 7760b57cec5SDimitry Andric /// corresponding value in the second parameter. 7770b57cec5SDimitry Andric /// 7780fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 7790fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns false. 7800b57cec5SDimitry Andric /// 7810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 7820b57cec5SDimitry Andric /// 7830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction. 7840b57cec5SDimitry Andric /// 7850b57cec5SDimitry Andric /// \param __a 7860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7870b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 7880b57cec5SDimitry Andric /// \param __b 7890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 7900b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 7910b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 7920b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 79381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, 79481ad6265SDimitry Andric __m128d __b) { 7950b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a); 7960b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8000b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8010fca6ea1SDimitry Andric /// the value in the first parameter is ordered with respect to the 8020b57cec5SDimitry Andric /// corresponding value in the second parameter. 8030b57cec5SDimitry Andric /// 8040fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8050fca6ea1SDimitry Andric /// of double-precision values are ordered with respect to each other if 8060b57cec5SDimitry Andric /// neither value is a NaN. 8070b57cec5SDimitry Andric /// 8080b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8090b57cec5SDimitry Andric /// 8100b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction. 8110b57cec5SDimitry Andric /// 8120b57cec5SDimitry Andric /// \param __a 8130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8140b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8150b57cec5SDimitry Andric /// \param __b 8160b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8170b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8180b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8190b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 82081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, 82181ad6265SDimitry Andric __m128d __b) { 8220b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b); 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8260b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8270fca6ea1SDimitry Andric /// the value in the first parameter is unordered with respect to the 8280b57cec5SDimitry Andric /// corresponding value in the second parameter. 8290b57cec5SDimitry Andric /// 8300fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair 8310fca6ea1SDimitry Andric /// of double-precision values are unordered with respect to each other if 8320b57cec5SDimitry Andric /// one or both values are NaN. 8330b57cec5SDimitry Andric /// 8340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8350b57cec5SDimitry Andric /// 8360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c> 8370b57cec5SDimitry Andric /// instruction. 8380b57cec5SDimitry Andric /// 8390b57cec5SDimitry Andric /// \param __a 8400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8410b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8420b57cec5SDimitry Andric /// \param __b 8430b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8440b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8450b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8460b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 84781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, 84881ad6265SDimitry Andric __m128d __b) { 8490b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b); 8500b57cec5SDimitry Andric } 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8530b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8540b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 8550b57cec5SDimitry Andric /// the second parameter. 8560b57cec5SDimitry Andric /// 8570fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8580fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 8590b57cec5SDimitry Andric /// 8600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8610b57cec5SDimitry Andric /// 8620b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction. 8630b57cec5SDimitry Andric /// 8640b57cec5SDimitry Andric /// \param __a 8650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8660b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8670b57cec5SDimitry Andric /// \param __b 8680b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8690b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8700b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8710b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 87281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, 87381ad6265SDimitry Andric __m128d __b) { 8740b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b); 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 8780b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 8790b57cec5SDimitry Andric /// the value in the first parameter is not less than the corresponding 8800b57cec5SDimitry Andric /// value in the second parameter. 8810b57cec5SDimitry Andric /// 8820fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 8830fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 8840b57cec5SDimitry Andric /// 8850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 8860b57cec5SDimitry Andric /// 8870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 8880b57cec5SDimitry Andric /// 8890b57cec5SDimitry Andric /// \param __a 8900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8910b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 8920b57cec5SDimitry Andric /// \param __b 8930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 8940b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 8950b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 8960b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 89781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, 89881ad6265SDimitry Andric __m128d __b) { 8990b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b); 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9030b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9040b57cec5SDimitry Andric /// the value in the first parameter is not less than or equal to the 9050b57cec5SDimitry Andric /// corresponding value in the second parameter. 9060b57cec5SDimitry Andric /// 9070fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9080fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 9090b57cec5SDimitry Andric /// 9100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9110b57cec5SDimitry Andric /// 9120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9130b57cec5SDimitry Andric /// 9140b57cec5SDimitry Andric /// \param __a 9150b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9160b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9170b57cec5SDimitry Andric /// \param __b 9180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9190b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9200b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9210b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 92281ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, 92381ad6265SDimitry Andric __m128d __b) { 9240b57cec5SDimitry Andric return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b); 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9280b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9290b57cec5SDimitry Andric /// the value in the first parameter is not greater than the corresponding 9300b57cec5SDimitry Andric /// value in the second parameter. 9310b57cec5SDimitry Andric /// 9320fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9330fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 9340b57cec5SDimitry Andric /// 9350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9360b57cec5SDimitry Andric /// 9370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction. 9380b57cec5SDimitry Andric /// 9390b57cec5SDimitry Andric /// \param __a 9400b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9410b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9420b57cec5SDimitry Andric /// \param __b 9430b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9440b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9450b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9460b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 94781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, 94881ad6265SDimitry Andric __m128d __b) { 9490b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a); 9500b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9540b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 9550b57cec5SDimitry Andric /// the value in the first parameter is not greater than or equal to the 9560b57cec5SDimitry Andric /// corresponding value in the second parameter. 9570b57cec5SDimitry Andric /// 9580fca6ea1SDimitry Andric /// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 9590fca6ea1SDimitry Andric /// If either value in a comparison is NaN, returns true. 9600b57cec5SDimitry Andric /// 9610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9620b57cec5SDimitry Andric /// 9630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction. 9640b57cec5SDimitry Andric /// 9650b57cec5SDimitry Andric /// \param __a 9660b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9670b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9680b57cec5SDimitry Andric /// \param __b 9690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9700b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9710b57cec5SDimitry Andric /// \returns A 128-bit vector. The lower 64 bits contains the comparison 9720b57cec5SDimitry Andric /// results. The upper 64 bits are copied from the upper 64 bits of \a __a. 97381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, 97481ad6265SDimitry Andric __m128d __b) { 9750b57cec5SDimitry Andric __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a); 9760b57cec5SDimitry Andric return __extension__(__m128d){__c[0], __a[1]}; 9770b57cec5SDimitry Andric } 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 9800b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 9810b57cec5SDimitry Andric /// 9820fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 9830fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 9840b57cec5SDimitry Andric /// 9850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 9860b57cec5SDimitry Andric /// 9870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 9880b57cec5SDimitry Andric /// 9890b57cec5SDimitry Andric /// \param __a 9900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9910b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 9920b57cec5SDimitry Andric /// \param __b 9930b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 9940b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 9950fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 99681ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, 99781ad6265SDimitry Andric __m128d __b) { 9980b57cec5SDimitry Andric return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10020b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10030b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 10040b57cec5SDimitry Andric /// the second parameter. 10050b57cec5SDimitry Andric /// 10060fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 10070fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 10080b57cec5SDimitry Andric /// 10090b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10100b57cec5SDimitry Andric /// 10110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10120b57cec5SDimitry Andric /// 10130b57cec5SDimitry Andric /// \param __a 10140b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10150b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10160b57cec5SDimitry Andric /// \param __b 10170b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10180b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10190fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 102081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, 102181ad6265SDimitry Andric __m128d __b) { 10220b57cec5SDimitry Andric return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b); 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10260b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10270b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 10280b57cec5SDimitry Andric /// corresponding value in the second parameter. 10290b57cec5SDimitry Andric /// 10300fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 10310fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 10320b57cec5SDimitry Andric /// 10330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10340b57cec5SDimitry Andric /// 10350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10360b57cec5SDimitry Andric /// 10370b57cec5SDimitry Andric /// \param __a 10380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10390b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10400b57cec5SDimitry Andric /// \param __b 10410b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10420b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10430fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 104481ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, 104581ad6265SDimitry Andric __m128d __b) { 10460b57cec5SDimitry Andric return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b); 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10500b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10510b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 10520b57cec5SDimitry Andric /// in the second parameter. 10530b57cec5SDimitry Andric /// 10540fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 10550fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 10560b57cec5SDimitry Andric /// 10570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10580b57cec5SDimitry Andric /// 10590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10600b57cec5SDimitry Andric /// 10610b57cec5SDimitry Andric /// \param __a 10620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10630b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10640b57cec5SDimitry Andric /// \param __b 10650b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10660b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10670fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 106881ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, 106981ad6265SDimitry Andric __m128d __b) { 10700b57cec5SDimitry Andric return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b); 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10740b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10750b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 10760b57cec5SDimitry Andric /// corresponding value in the second parameter. 10770b57cec5SDimitry Andric /// 10780fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 10790fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 10800b57cec5SDimitry Andric /// 10810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 10820b57cec5SDimitry Andric /// 10830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 10840b57cec5SDimitry Andric /// 10850b57cec5SDimitry Andric /// \param __a 10860b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10870b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 10880b57cec5SDimitry Andric /// \param __b 10890b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 10900b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 10910fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 109281ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, 109381ad6265SDimitry Andric __m128d __b) { 10940b57cec5SDimitry Andric return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b); 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 10980b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 10990b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 11000b57cec5SDimitry Andric /// the second parameter. 11010b57cec5SDimitry Andric /// 11020fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 11030fca6ea1SDimitry Andric /// comparison is NaN, returns 1. 11040b57cec5SDimitry Andric /// 11050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11060b57cec5SDimitry Andric /// 11070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction. 11080b57cec5SDimitry Andric /// 11090b57cec5SDimitry Andric /// \param __a 11100b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11110b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11120b57cec5SDimitry Andric /// \param __b 11130b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11140b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11150fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 111681ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, 111781ad6265SDimitry Andric __m128d __b) { 11180b57cec5SDimitry Andric return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b); 11190b57cec5SDimitry Andric } 11200b57cec5SDimitry Andric 11210b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11220fca6ea1SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] for equality. 11230b57cec5SDimitry Andric /// 11240fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 11250fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 11260b57cec5SDimitry Andric /// 11270b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11280b57cec5SDimitry Andric /// 11290b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11300b57cec5SDimitry Andric /// 11310b57cec5SDimitry Andric /// \param __a 11320b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11330b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11340b57cec5SDimitry Andric /// \param __b 11350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11360b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11370fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 113881ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, 113981ad6265SDimitry Andric __m128d __b) { 11400b57cec5SDimitry Andric return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b); 11410b57cec5SDimitry Andric } 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11440b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11450b57cec5SDimitry Andric /// the value in the first parameter is less than the corresponding value in 11460b57cec5SDimitry Andric /// the second parameter. 11470b57cec5SDimitry Andric /// 11480fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 11490fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 11500b57cec5SDimitry Andric /// 11510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11520b57cec5SDimitry Andric /// 11530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11540b57cec5SDimitry Andric /// 11550b57cec5SDimitry Andric /// \param __a 11560b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11570b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11580b57cec5SDimitry Andric /// \param __b 11590b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11600b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11610fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 116281ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, 116381ad6265SDimitry Andric __m128d __b) { 11640b57cec5SDimitry Andric return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b); 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11680b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11690b57cec5SDimitry Andric /// the value in the first parameter is less than or equal to the 11700b57cec5SDimitry Andric /// corresponding value in the second parameter. 11710b57cec5SDimitry Andric /// 11720fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 11730fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 11740b57cec5SDimitry Andric /// 11750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 11760b57cec5SDimitry Andric /// 11770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 11780b57cec5SDimitry Andric /// 11790b57cec5SDimitry Andric /// \param __a 11800b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11810b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 11820b57cec5SDimitry Andric /// \param __b 11830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 11840b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 11850fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 118681ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, 118781ad6265SDimitry Andric __m128d __b) { 11880b57cec5SDimitry Andric return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b); 11890b57cec5SDimitry Andric } 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 11920b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 11930b57cec5SDimitry Andric /// the value in the first parameter is greater than the corresponding value 11940b57cec5SDimitry Andric /// in the second parameter. 11950b57cec5SDimitry Andric /// 11960fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 11970fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 11980b57cec5SDimitry Andric /// 11990b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12000b57cec5SDimitry Andric /// 12010b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12020b57cec5SDimitry Andric /// 12030b57cec5SDimitry Andric /// \param __a 12040b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12050b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12060b57cec5SDimitry Andric /// \param __b 12070b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12080b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12090fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 121081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, 121181ad6265SDimitry Andric __m128d __b) { 12120b57cec5SDimitry Andric return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b); 12130b57cec5SDimitry Andric } 12140b57cec5SDimitry Andric 12150b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12160b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12170b57cec5SDimitry Andric /// the value in the first parameter is greater than or equal to the 12180b57cec5SDimitry Andric /// corresponding value in the second parameter. 12190b57cec5SDimitry Andric /// 12200fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 12210fca6ea1SDimitry Andric /// comparison is NaN, returns 0. 12220b57cec5SDimitry Andric /// 12230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12240b57cec5SDimitry Andric /// 12250b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12260b57cec5SDimitry Andric /// 12270b57cec5SDimitry Andric /// \param __a 12280b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12290b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12300b57cec5SDimitry Andric /// \param __b 12310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12320b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12330fca6ea1SDimitry Andric /// \returns An integer containing the comparison results. 123481ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, 123581ad6265SDimitry Andric __m128d __b) { 12360b57cec5SDimitry Andric return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b); 12370b57cec5SDimitry Andric } 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric /// Compares the lower double-precision floating-point values in each of 12400b57cec5SDimitry Andric /// the two 128-bit floating-point vectors of [2 x double] to determine if 12410b57cec5SDimitry Andric /// the value in the first parameter is unequal to the corresponding value in 12420b57cec5SDimitry Andric /// the second parameter. 12430b57cec5SDimitry Andric /// 12440fca6ea1SDimitry Andric /// The comparison returns 0 for false, 1 for true. If either value in a 12450fca6ea1SDimitry Andric /// comparison is NaN, returns 1. 12460b57cec5SDimitry Andric /// 12470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12480b57cec5SDimitry Andric /// 12490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction. 12500b57cec5SDimitry Andric /// 12510b57cec5SDimitry Andric /// \param __a 12520b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12530b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __b. 12540b57cec5SDimitry Andric /// \param __b 12550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision value is 12560b57cec5SDimitry Andric /// compared to the lower double-precision value of \a __a. 12570fca6ea1SDimitry Andric /// \returns An integer containing the comparison result. 125881ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, 125981ad6265SDimitry Andric __m128d __b) { 12600b57cec5SDimitry Andric return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b); 12610b57cec5SDimitry Andric } 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 12640b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two single-precision floating-point 12650b57cec5SDimitry Andric /// values, returned in the lower 64 bits of a 128-bit vector of [4 x float]. 12660b57cec5SDimitry Andric /// The upper 64 bits of the result vector are set to zero. 12670b57cec5SDimitry Andric /// 12680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12690b57cec5SDimitry Andric /// 12700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction. 12710b57cec5SDimitry Andric /// 12720b57cec5SDimitry Andric /// \param __a 12730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 12740b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the 12750b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 127681ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { 12770b57cec5SDimitry Andric return __builtin_ia32_cvtpd2ps((__v2df)__a); 12780b57cec5SDimitry Andric } 12790b57cec5SDimitry Andric 12800b57cec5SDimitry Andric /// Converts the lower two single-precision floating-point elements of a 12810b57cec5SDimitry Andric /// 128-bit vector of [4 x float] into two double-precision floating-point 12820b57cec5SDimitry Andric /// values, returned in a 128-bit vector of [2 x double]. The upper two 12830b57cec5SDimitry Andric /// elements of the input vector are unused. 12840b57cec5SDimitry Andric /// 12850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 12860b57cec5SDimitry Andric /// 12870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction. 12880b57cec5SDimitry Andric /// 12890b57cec5SDimitry Andric /// \param __a 12900b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower two single-precision 12910b57cec5SDimitry Andric /// floating-point elements are converted to double-precision values. The 12920b57cec5SDimitry Andric /// upper two elements are unused. 12930b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 129481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { 12950b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 12960b57cec5SDimitry Andric __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); 12970b57cec5SDimitry Andric } 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric /// Converts the lower two integer elements of a 128-bit vector of 13000b57cec5SDimitry Andric /// [4 x i32] into two double-precision floating-point values, returned in a 13010b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 13020b57cec5SDimitry Andric /// 13030b57cec5SDimitry Andric /// The upper two elements of the input vector are unused. 13040b57cec5SDimitry Andric /// 13050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13060b57cec5SDimitry Andric /// 13070b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction. 13080b57cec5SDimitry Andric /// 13090b57cec5SDimitry Andric /// \param __a 13100b57cec5SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The lower two integer elements are 13110b57cec5SDimitry Andric /// converted to double-precision values. 13120b57cec5SDimitry Andric /// 13130b57cec5SDimitry Andric /// The upper two elements are unused. 13140b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 131581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { 13160b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 13170b57cec5SDimitry Andric __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 13210b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 13220b57cec5SDimitry Andric /// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper 13230b57cec5SDimitry Andric /// 64 bits of the result vector are set to zero. 13240b57cec5SDimitry Andric /// 13250fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 13260fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 13270fca6ea1SDimitry Andric /// the most negative integer. 13280fca6ea1SDimitry Andric /// 13290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13300b57cec5SDimitry Andric /// 13310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction. 13320b57cec5SDimitry Andric /// 13330b57cec5SDimitry Andric /// \param __a 13340b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 13350b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 13360b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 133781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a) { 13380b57cec5SDimitry Andric return __builtin_ia32_cvtpd2dq((__v2df)__a); 13390b57cec5SDimitry Andric } 13400b57cec5SDimitry Andric 13410b57cec5SDimitry Andric /// Converts the low-order element of a 128-bit vector of [2 x double] 13420b57cec5SDimitry Andric /// into a 32-bit signed integer value. 13430b57cec5SDimitry Andric /// 13440fca6ea1SDimitry Andric /// If the converted value does not fit in a 32-bit integer, raises a 13450fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 13460fca6ea1SDimitry Andric /// the most negative integer. 13470fca6ea1SDimitry Andric /// 13480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13490b57cec5SDimitry Andric /// 13500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 13510b57cec5SDimitry Andric /// 13520b57cec5SDimitry Andric /// \param __a 13530b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 13540b57cec5SDimitry Andric /// conversion. 13550b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 135681ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) { 13570b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si((__v2df)__a); 13580b57cec5SDimitry Andric } 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric /// Converts the lower double-precision floating-point element of a 13610b57cec5SDimitry Andric /// 128-bit vector of [2 x double], in the second parameter, into a 13620b57cec5SDimitry Andric /// single-precision floating-point value, returned in the lower 32 bits of a 13630b57cec5SDimitry Andric /// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are 13640b57cec5SDimitry Andric /// copied from the upper 96 bits of the first parameter. 13650b57cec5SDimitry Andric /// 13660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13670b57cec5SDimitry Andric /// 13680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction. 13690b57cec5SDimitry Andric /// 13700b57cec5SDimitry Andric /// \param __a 13710b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are 13720b57cec5SDimitry Andric /// copied to the upper 96 bits of the result. 13730b57cec5SDimitry Andric /// \param __b 13740b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower double-precision 13750b57cec5SDimitry Andric /// floating-point element is used in the conversion. 13760b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the 13770b57cec5SDimitry Andric /// converted value from the second parameter. The upper 96 bits are copied 13780b57cec5SDimitry Andric /// from the upper 96 bits of the first parameter. 137981ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, 138081ad6265SDimitry Andric __m128d __b) { 13810b57cec5SDimitry Andric return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); 13820b57cec5SDimitry Andric } 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric /// Converts a 32-bit signed integer value, in the second parameter, into 13850b57cec5SDimitry Andric /// a double-precision floating-point value, returned in the lower 64 bits of 13860b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 13870b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 13880b57cec5SDimitry Andric /// 13890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 13900b57cec5SDimitry Andric /// 13910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 13920b57cec5SDimitry Andric /// 13930b57cec5SDimitry Andric /// \param __a 13940b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 13950b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 13960b57cec5SDimitry Andric /// \param __b 13970b57cec5SDimitry Andric /// A 32-bit signed integer containing the value to be converted. 13980b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 13990b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14000b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 140181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, 140281ad6265SDimitry Andric int __b) { 14030b57cec5SDimitry Andric __a[0] = __b; 14040b57cec5SDimitry Andric return __a; 14050b57cec5SDimitry Andric } 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric /// Converts the lower single-precision floating-point element of a 14080b57cec5SDimitry Andric /// 128-bit vector of [4 x float], in the second parameter, into a 14090b57cec5SDimitry Andric /// double-precision floating-point value, returned in the lower 64 bits of 14100b57cec5SDimitry Andric /// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector 14110b57cec5SDimitry Andric /// are copied from the upper 64 bits of the first parameter. 14120b57cec5SDimitry Andric /// 14130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14140b57cec5SDimitry Andric /// 14150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction. 14160b57cec5SDimitry Andric /// 14170b57cec5SDimitry Andric /// \param __a 14180b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are 14190b57cec5SDimitry Andric /// copied to the upper 64 bits of the result. 14200b57cec5SDimitry Andric /// \param __b 14210b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. The lower single-precision 14220b57cec5SDimitry Andric /// floating-point element is used in the conversion. 14230b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the 14240b57cec5SDimitry Andric /// converted value from the second parameter. The upper 64 bits are copied 14250b57cec5SDimitry Andric /// from the upper 64 bits of the first parameter. 142681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, 142781ad6265SDimitry Andric __m128 __b) { 14280b57cec5SDimitry Andric __a[0] = __b[0]; 14290b57cec5SDimitry Andric return __a; 14300b57cec5SDimitry Andric } 14310b57cec5SDimitry Andric 14320b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14330fca6ea1SDimitry Andric /// 128-bit vector of [2 x double] into two signed truncated (rounded 14340fca6ea1SDimitry Andric /// toward zero) 32-bit integer values, returned in the lower 64 bits 14350fca6ea1SDimitry Andric /// of a 128-bit vector of [4 x i32]. 14360b57cec5SDimitry Andric /// 14370fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 14380fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 14390fca6ea1SDimitry Andric /// the most negative integer. 14400b57cec5SDimitry Andric /// 14410b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14420b57cec5SDimitry Andric /// 14430b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c> 14440b57cec5SDimitry Andric /// instruction. 14450b57cec5SDimitry Andric /// 14460b57cec5SDimitry Andric /// \param __a 14470b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14480b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the 14490b57cec5SDimitry Andric /// converted values. The upper 64 bits are set to zero. 145081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a) { 14510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a); 14520b57cec5SDimitry Andric } 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric /// Converts the low-order element of a [2 x double] vector into a 32-bit 14550fca6ea1SDimitry Andric /// signed truncated (rounded toward zero) integer value. 14560fca6ea1SDimitry Andric /// 14570fca6ea1SDimitry Andric /// If the converted value does not fit in a 32-bit integer, raises a 14580fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 14590fca6ea1SDimitry Andric /// the most negative integer. 14600b57cec5SDimitry Andric /// 14610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14620b57cec5SDimitry Andric /// 14630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 14640b57cec5SDimitry Andric /// instruction. 14650b57cec5SDimitry Andric /// 14660b57cec5SDimitry Andric /// \param __a 14670b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 14680b57cec5SDimitry Andric /// conversion. 14690b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the converted value. 147081ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { 14710b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si((__v2df)__a); 14720b57cec5SDimitry Andric } 14730b57cec5SDimitry Andric 14740b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14750b57cec5SDimitry Andric /// 128-bit vector of [2 x double] into two signed 32-bit integer values, 14760b57cec5SDimitry Andric /// returned in a 64-bit vector of [2 x i32]. 14770b57cec5SDimitry Andric /// 14780fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 14790fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 14800fca6ea1SDimitry Andric /// the most negative integer. 14810fca6ea1SDimitry Andric /// 14820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 14830b57cec5SDimitry Andric /// 14840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction. 14850b57cec5SDimitry Andric /// 14860b57cec5SDimitry Andric /// \param __a 14870b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 14880b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 148981ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { 14900b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); 14910b57cec5SDimitry Andric } 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric /// Converts the two double-precision floating-point elements of a 14940fca6ea1SDimitry Andric /// 128-bit vector of [2 x double] into two signed truncated (rounded toward 14950fca6ea1SDimitry Andric /// zero) 32-bit integer values, returned in a 64-bit vector of [2 x i32]. 14960b57cec5SDimitry Andric /// 14970fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 14980fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 14990fca6ea1SDimitry Andric /// the most negative integer. 15000b57cec5SDimitry Andric /// 15010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15020b57cec5SDimitry Andric /// 15030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction. 15040b57cec5SDimitry Andric /// 15050b57cec5SDimitry Andric /// \param __a 15060b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 15070b57cec5SDimitry Andric /// \returns A 64-bit vector of [2 x i32] containing the converted values. 150881ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { 15090b57cec5SDimitry Andric return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); 15100b57cec5SDimitry Andric } 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric /// Converts the two signed 32-bit integer elements of a 64-bit vector of 15130b57cec5SDimitry Andric /// [2 x i32] into two double-precision floating-point values, returned in a 15140b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 15150b57cec5SDimitry Andric /// 15160b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15170b57cec5SDimitry Andric /// 15180b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction. 15190b57cec5SDimitry Andric /// 15200b57cec5SDimitry Andric /// \param __a 15210b57cec5SDimitry Andric /// A 64-bit vector of [2 x i32]. 15220b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the converted values. 152381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { 15240b57cec5SDimitry Andric return __builtin_ia32_cvtpi2pd((__v2si)__a); 15250b57cec5SDimitry Andric } 15260b57cec5SDimitry Andric 15270b57cec5SDimitry Andric /// Returns the low-order element of a 128-bit vector of [2 x double] as 15280b57cec5SDimitry Andric /// a double-precision floating-point value. 15290b57cec5SDimitry Andric /// 15300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15310b57cec5SDimitry Andric /// 15320b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 15330b57cec5SDimitry Andric /// 15340b57cec5SDimitry Andric /// \param __a 15350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. 15360b57cec5SDimitry Andric /// \returns A double-precision floating-point value copied from the lower 64 15370b57cec5SDimitry Andric /// bits of \a __a. 153881ad6265SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { 15390b57cec5SDimitry Andric return __a[0]; 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an aligned 15430b57cec5SDimitry Andric /// memory location. 15440b57cec5SDimitry Andric /// 15450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15460b57cec5SDimitry Andric /// 15470b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction. 15480b57cec5SDimitry Andric /// 15490b57cec5SDimitry Andric /// \param __dp 15500b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 15510b57cec5SDimitry Andric /// location has to be 16-byte aligned. 15520b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 155381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp) { 1554480093f4SDimitry Andric return *(const __m128d *)__dp; 15550b57cec5SDimitry Andric } 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric /// Loads a double-precision floating-point value from a specified memory 15580b57cec5SDimitry Andric /// location and duplicates it to both vector elements of a 128-bit vector of 15590b57cec5SDimitry Andric /// [2 x double]. 15600b57cec5SDimitry Andric /// 15610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15620b57cec5SDimitry Andric /// 15630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction. 15640b57cec5SDimitry Andric /// 15650b57cec5SDimitry Andric /// \param __dp 15660b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 15670b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded and 15680b57cec5SDimitry Andric /// duplicated values. 156981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp) { 15700b57cec5SDimitry Andric struct __mm_load1_pd_struct { 15710b57cec5SDimitry Andric double __u; 15720b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1573480093f4SDimitry Andric double __u = ((const struct __mm_load1_pd_struct *)__dp)->__u; 15740b57cec5SDimitry Andric return __extension__(__m128d){__u, __u}; 15750b57cec5SDimitry Andric } 15760b57cec5SDimitry Andric 15770b57cec5SDimitry Andric #define _mm_load_pd1(dp) _mm_load1_pd(dp) 15780b57cec5SDimitry Andric 15790b57cec5SDimitry Andric /// Loads two double-precision values, in reverse order, from an aligned 15800b57cec5SDimitry Andric /// memory location into a 128-bit vector of [2 x double]. 15810b57cec5SDimitry Andric /// 15820b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 15830b57cec5SDimitry Andric /// 15840b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction + 15850b57cec5SDimitry Andric /// needed shuffling instructions. In AVX mode, the shuffling may be combined 15860b57cec5SDimitry Andric /// with the \c VMOVAPD, resulting in only a \c VPERMILPD instruction. 15870b57cec5SDimitry Andric /// 15880b57cec5SDimitry Andric /// \param __dp 15890b57cec5SDimitry Andric /// A 16-byte aligned pointer to an array of double-precision values to be 15900b57cec5SDimitry Andric /// loaded in reverse order. 15910b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the reversed loaded 15920b57cec5SDimitry Andric /// values. 159381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp) { 1594480093f4SDimitry Andric __m128d __u = *(const __m128d *)__dp; 15950b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0); 15960b57cec5SDimitry Andric } 15970b57cec5SDimitry Andric 15980b57cec5SDimitry Andric /// Loads a 128-bit floating-point vector of [2 x double] from an 15990b57cec5SDimitry Andric /// unaligned memory location. 16000b57cec5SDimitry Andric /// 16010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16020b57cec5SDimitry Andric /// 16030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 16040b57cec5SDimitry Andric /// 16050b57cec5SDimitry Andric /// \param __dp 16060b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 16070b57cec5SDimitry Andric /// location does not have to be aligned. 16080b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded values. 160981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { 16100b57cec5SDimitry Andric struct __loadu_pd { 16110b57cec5SDimitry Andric __m128d_u __v; 16120b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1613480093f4SDimitry Andric return ((const struct __loadu_pd *)__dp)->__v; 16140b57cec5SDimitry Andric } 16150b57cec5SDimitry Andric 16160b57cec5SDimitry Andric /// Loads a 64-bit integer value to the low element of a 128-bit integer 16170b57cec5SDimitry Andric /// vector and clears the upper element. 16180b57cec5SDimitry Andric /// 16190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16200b57cec5SDimitry Andric /// 16210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 16220b57cec5SDimitry Andric /// 16230b57cec5SDimitry Andric /// \param __a 16240b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 16250b57cec5SDimitry Andric /// location does not have to be aligned. 16260b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the loaded value. 162781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a) { 16280b57cec5SDimitry Andric struct __loadu_si64 { 16290b57cec5SDimitry Andric long long __v; 16300b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1631480093f4SDimitry Andric long long __u = ((const struct __loadu_si64 *)__a)->__v; 16320b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__u, 0LL}; 16330b57cec5SDimitry Andric } 16340b57cec5SDimitry Andric 16350b57cec5SDimitry Andric /// Loads a 32-bit integer value to the low element of a 128-bit integer 16360b57cec5SDimitry Andric /// vector and clears the upper element. 16370b57cec5SDimitry Andric /// 16380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16390b57cec5SDimitry Andric /// 16400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 16410b57cec5SDimitry Andric /// 16420b57cec5SDimitry Andric /// \param __a 16430b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 16440b57cec5SDimitry Andric /// location does not have to be aligned. 16450b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the loaded value. 164681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a) { 16470b57cec5SDimitry Andric struct __loadu_si32 { 16480b57cec5SDimitry Andric int __v; 16490b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1650480093f4SDimitry Andric int __u = ((const struct __loadu_si32 *)__a)->__v; 16510b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__u, 0, 0, 0}; 16520b57cec5SDimitry Andric } 16530b57cec5SDimitry Andric 16540b57cec5SDimitry Andric /// Loads a 16-bit integer value to the low element of a 128-bit integer 16550b57cec5SDimitry Andric /// vector and clears the upper element. 16560b57cec5SDimitry Andric /// 16570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16580b57cec5SDimitry Andric /// 16590b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 16600b57cec5SDimitry Andric /// 16610b57cec5SDimitry Andric /// \param __a 16620b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 16630b57cec5SDimitry Andric /// location does not have to be aligned. 16640b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the loaded value. 166581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a) { 16660b57cec5SDimitry Andric struct __loadu_si16 { 16670b57cec5SDimitry Andric short __v; 16680b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1669480093f4SDimitry Andric short __u = ((const struct __loadu_si16 *)__a)->__v; 16700b57cec5SDimitry Andric return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0}; 16710b57cec5SDimitry Andric } 16720b57cec5SDimitry Andric 16730b57cec5SDimitry Andric /// Loads a 64-bit double-precision value to the low element of a 16740b57cec5SDimitry Andric /// 128-bit integer vector and clears the upper element. 16750b57cec5SDimitry Andric /// 16760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16770b57cec5SDimitry Andric /// 16780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 16790b57cec5SDimitry Andric /// 16800b57cec5SDimitry Andric /// \param __dp 16810b57cec5SDimitry Andric /// A pointer to a memory location containing a double-precision value. 16820b57cec5SDimitry Andric /// The address of the memory location does not have to be aligned. 16830b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the loaded value. 168481ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp) { 16850b57cec5SDimitry Andric struct __mm_load_sd_struct { 16860b57cec5SDimitry Andric double __u; 16870b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1688480093f4SDimitry Andric double __u = ((const struct __mm_load_sd_struct *)__dp)->__u; 16890b57cec5SDimitry Andric return __extension__(__m128d){__u, 0}; 16900b57cec5SDimitry Andric } 16910b57cec5SDimitry Andric 16920b57cec5SDimitry Andric /// Loads a double-precision value into the high-order bits of a 128-bit 16930b57cec5SDimitry Andric /// vector of [2 x double]. The low-order bits are copied from the low-order 16940b57cec5SDimitry Andric /// bits of the first operand. 16950b57cec5SDimitry Andric /// 16960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 16970b57cec5SDimitry Andric /// 16980b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 16990b57cec5SDimitry Andric /// 17000b57cec5SDimitry Andric /// \param __a 17010b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17020b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the result. 17030b57cec5SDimitry Andric /// \param __dp 17040b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17050b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17060b57cec5SDimitry Andric /// [127:64] of the result. The address of the memory location does not have 17070b57cec5SDimitry Andric /// to be aligned. 17080b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 170981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, 171081ad6265SDimitry Andric double const *__dp) { 17110b57cec5SDimitry Andric struct __mm_loadh_pd_struct { 17120b57cec5SDimitry Andric double __u; 17130b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1714480093f4SDimitry Andric double __u = ((const struct __mm_loadh_pd_struct *)__dp)->__u; 17150b57cec5SDimitry Andric return __extension__(__m128d){__a[0], __u}; 17160b57cec5SDimitry Andric } 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric /// Loads a double-precision value into the low-order bits of a 128-bit 17190b57cec5SDimitry Andric /// vector of [2 x double]. The high-order bits are copied from the 17200b57cec5SDimitry Andric /// high-order bits of the first operand. 17210b57cec5SDimitry Andric /// 17220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17230b57cec5SDimitry Andric /// 17240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 17250b57cec5SDimitry Andric /// 17260b57cec5SDimitry Andric /// \param __a 17270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 17280b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the result. 17290b57cec5SDimitry Andric /// \param __dp 17300b57cec5SDimitry Andric /// A pointer to a 64-bit memory location containing a double-precision 17310b57cec5SDimitry Andric /// floating-point value that is loaded. The loaded value is written to bits 17320b57cec5SDimitry Andric /// [63:0] of the result. The address of the memory location does not have to 17330b57cec5SDimitry Andric /// be aligned. 17340b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 173581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, 173681ad6265SDimitry Andric double const *__dp) { 17370b57cec5SDimitry Andric struct __mm_loadl_pd_struct { 17380b57cec5SDimitry Andric double __u; 17390b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1740480093f4SDimitry Andric double __u = ((const struct __mm_loadl_pd_struct *)__dp)->__u; 17410b57cec5SDimitry Andric return __extension__(__m128d){__u, __a[1]}; 17420b57cec5SDimitry Andric } 17430b57cec5SDimitry Andric 17440b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] with 17450b57cec5SDimitry Andric /// unspecified content. This could be used as an argument to another 17460b57cec5SDimitry Andric /// intrinsic function where the argument is required but the value is not 17470b57cec5SDimitry Andric /// actually used. 17480b57cec5SDimitry Andric /// 17490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17500b57cec5SDimitry Andric /// 17510b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 17520b57cec5SDimitry Andric /// 17530b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] with unspecified 17540b57cec5SDimitry Andric /// content. 175581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { 17560b57cec5SDimitry Andric return (__m128d)__builtin_ia32_undef128(); 17570b57cec5SDimitry Andric } 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 17600b57cec5SDimitry Andric /// 64 bits of the vector are initialized with the specified double-precision 17610b57cec5SDimitry Andric /// floating-point value. The upper 64 bits are set to zero. 17620b57cec5SDimitry Andric /// 17630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17640b57cec5SDimitry Andric /// 17650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 17660b57cec5SDimitry Andric /// 17670b57cec5SDimitry Andric /// \param __w 17680b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 17690b57cec5SDimitry Andric /// bits of the result. 17700b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. The 17710b57cec5SDimitry Andric /// lower 64 bits contain the value of the parameter. The upper 64 bits are 17720b57cec5SDimitry Andric /// set to zero. 177381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { 1774*6c4b055cSDimitry Andric return __extension__(__m128d){__w, 0.0}; 17750b57cec5SDimitry Andric } 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17780b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17790b57cec5SDimitry Andric /// specified double-precision floating-point value. 17800b57cec5SDimitry Andric /// 17810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17820b57cec5SDimitry Andric /// 17830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 17840b57cec5SDimitry Andric /// 17850b57cec5SDimitry Andric /// \param __w 17860b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 17870b57cec5SDimitry Andric /// element of the result. 17880b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 178981ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { 17900b57cec5SDimitry Andric return __extension__(__m128d){__w, __w}; 17910b57cec5SDimitry Andric } 17920b57cec5SDimitry Andric 17930b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], with each 17940b57cec5SDimitry Andric /// of the two double-precision floating-point vector elements set to the 17950b57cec5SDimitry Andric /// specified double-precision floating-point value. 17960b57cec5SDimitry Andric /// 17970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 17980b57cec5SDimitry Andric /// 17990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction. 18000b57cec5SDimitry Andric /// 18010b57cec5SDimitry Andric /// \param __w 18020b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize each vector 18030b57cec5SDimitry Andric /// element of the result. 18040b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 180581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { 18060b57cec5SDimitry Andric return _mm_set1_pd(__w); 18070b57cec5SDimitry Andric } 18080b57cec5SDimitry Andric 18090b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18100b57cec5SDimitry Andric /// initialized with the specified double-precision floating-point values. 18110b57cec5SDimitry Andric /// 18120b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18130b57cec5SDimitry Andric /// 18140b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18150b57cec5SDimitry Andric /// 18160b57cec5SDimitry Andric /// \param __w 18170b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18180b57cec5SDimitry Andric /// bits of the result. 18190b57cec5SDimitry Andric /// \param __x 18200b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18210b57cec5SDimitry Andric /// bits of the result. 18220b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 182381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, 182481ad6265SDimitry Andric double __x) { 18250b57cec5SDimitry Andric return __extension__(__m128d){__x, __w}; 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric 18280b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double], 18290b57cec5SDimitry Andric /// initialized in reverse order with the specified double-precision 18300b57cec5SDimitry Andric /// floating-point values. 18310b57cec5SDimitry Andric /// 18320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18330b57cec5SDimitry Andric /// 18340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 18350b57cec5SDimitry Andric /// 18360b57cec5SDimitry Andric /// \param __w 18370b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the lower 64 18380b57cec5SDimitry Andric /// bits of the result. 18390b57cec5SDimitry Andric /// \param __x 18400b57cec5SDimitry Andric /// A double-precision floating-point value used to initialize the upper 64 18410b57cec5SDimitry Andric /// bits of the result. 18420b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double]. 184381ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, 184481ad6265SDimitry Andric double __x) { 18450b57cec5SDimitry Andric return __extension__(__m128d){__w, __x}; 18460b57cec5SDimitry Andric } 18470b57cec5SDimitry Andric 18480b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] 18490b57cec5SDimitry Andric /// initialized to zero. 18500b57cec5SDimitry Andric /// 18510b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18520b57cec5SDimitry Andric /// 18530b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 18540b57cec5SDimitry Andric /// 18550b57cec5SDimitry Andric /// \returns An initialized 128-bit floating-point vector of [2 x double] with 18560b57cec5SDimitry Andric /// all elements set to zero. 185781ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { 1858bdd1243dSDimitry Andric return __extension__(__m128d){0.0, 0.0}; 18590b57cec5SDimitry Andric } 18600b57cec5SDimitry Andric 18610b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double]. The lower 18620b57cec5SDimitry Andric /// 64 bits are set to the lower 64 bits of the second parameter. The upper 18630b57cec5SDimitry Andric /// 64 bits are set to the upper 64 bits of the first parameter. 18640b57cec5SDimitry Andric /// 18650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18660b57cec5SDimitry Andric /// 18670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 18680b57cec5SDimitry Andric /// 18690b57cec5SDimitry Andric /// \param __a 18700b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits are written to the 18710b57cec5SDimitry Andric /// upper 64 bits of the result. 18720b57cec5SDimitry Andric /// \param __b 18730b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the 18740b57cec5SDimitry Andric /// lower 64 bits of the result. 18750b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved values. 187681ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, 187781ad6265SDimitry Andric __m128d __b) { 18780b57cec5SDimitry Andric __a[0] = __b[0]; 18790b57cec5SDimitry Andric return __a; 18800b57cec5SDimitry Andric } 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 18830b57cec5SDimitry Andric /// memory location. 18840b57cec5SDimitry Andric /// 18850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 18860b57cec5SDimitry Andric /// 18870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction. 18880b57cec5SDimitry Andric /// 18890b57cec5SDimitry Andric /// \param __dp 18900b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 18910b57cec5SDimitry Andric /// \param __a 18920b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 189381ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, 189481ad6265SDimitry Andric __m128d __a) { 18950b57cec5SDimitry Andric struct __mm_store_sd_struct { 18960b57cec5SDimitry Andric double __u; 18970b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 18980b57cec5SDimitry Andric ((struct __mm_store_sd_struct *)__dp)->__u = __a[0]; 18990b57cec5SDimitry Andric } 19000b57cec5SDimitry Andric 19010b57cec5SDimitry Andric /// Moves packed double-precision values from a 128-bit vector of 19020b57cec5SDimitry Andric /// [2 x double] to a memory location. 19030b57cec5SDimitry Andric /// 19040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19050b57cec5SDimitry Andric /// 19060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction. 19070b57cec5SDimitry Andric /// 19080b57cec5SDimitry Andric /// \param __dp 19090b57cec5SDimitry Andric /// A pointer to an aligned memory location that can store two 19100b57cec5SDimitry Andric /// double-precision values. 19110b57cec5SDimitry Andric /// \param __a 19120b57cec5SDimitry Andric /// A packed 128-bit vector of [2 x double] containing the values to be 19130b57cec5SDimitry Andric /// moved. 191481ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, 191581ad6265SDimitry Andric __m128d __a) { 19160b57cec5SDimitry Andric *(__m128d *)__dp = __a; 19170b57cec5SDimitry Andric } 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19200b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 19210b57cec5SDimitry Andric /// 19220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19230b57cec5SDimitry Andric /// 19240b57cec5SDimitry Andric /// This intrinsic corresponds to the 19250b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19260b57cec5SDimitry Andric /// 19270b57cec5SDimitry Andric /// \param __dp 19280b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19290b57cec5SDimitry Andric /// values. 19300b57cec5SDimitry Andric /// \param __a 19310b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19320b57cec5SDimitry Andric /// of the values in \a __dp. 193381ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, 193481ad6265SDimitry Andric __m128d __a) { 19350b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 19360b57cec5SDimitry Andric _mm_store_pd(__dp, __a); 19370b57cec5SDimitry Andric } 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to 19400b57cec5SDimitry Andric /// the upper and lower 64 bits of a memory location. 19410b57cec5SDimitry Andric /// 19420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19430b57cec5SDimitry Andric /// 19440b57cec5SDimitry Andric /// This intrinsic corresponds to the 19450b57cec5SDimitry Andric /// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction. 19460b57cec5SDimitry Andric /// 19470b57cec5SDimitry Andric /// \param __dp 19480b57cec5SDimitry Andric /// A pointer to a memory location that can store two double-precision 19490b57cec5SDimitry Andric /// values. 19500b57cec5SDimitry Andric /// \param __a 19510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each 19520b57cec5SDimitry Andric /// of the values in \a __dp. 195381ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, 195481ad6265SDimitry Andric __m128d __a) { 19550b57cec5SDimitry Andric _mm_store1_pd(__dp, __a); 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric /// Stores a 128-bit vector of [2 x double] into an unaligned memory 19590b57cec5SDimitry Andric /// location. 19600b57cec5SDimitry Andric /// 19610b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19620b57cec5SDimitry Andric /// 19630b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction. 19640b57cec5SDimitry Andric /// 19650b57cec5SDimitry Andric /// \param __dp 19660b57cec5SDimitry Andric /// A pointer to a 128-bit memory location. The address of the memory 19670b57cec5SDimitry Andric /// location does not have to be aligned. 19680b57cec5SDimitry Andric /// \param __a 19690b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be stored. 197081ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, 197181ad6265SDimitry Andric __m128d __a) { 19720b57cec5SDimitry Andric struct __storeu_pd { 19730b57cec5SDimitry Andric __m128d_u __v; 19740b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 19750b57cec5SDimitry Andric ((struct __storeu_pd *)__dp)->__v = __a; 19760b57cec5SDimitry Andric } 19770b57cec5SDimitry Andric 19780b57cec5SDimitry Andric /// Stores two double-precision values, in reverse order, from a 128-bit 19790b57cec5SDimitry Andric /// vector of [2 x double] to a 16-byte aligned memory location. 19800b57cec5SDimitry Andric /// 19810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 19820b57cec5SDimitry Andric /// 19830b57cec5SDimitry Andric /// This intrinsic corresponds to a shuffling instruction followed by a 19840b57cec5SDimitry Andric /// <c> VMOVAPD / MOVAPD </c> instruction. 19850b57cec5SDimitry Andric /// 19860b57cec5SDimitry Andric /// \param __dp 19870b57cec5SDimitry Andric /// A pointer to a 16-byte aligned memory location that can store two 19880b57cec5SDimitry Andric /// double-precision values. 19890b57cec5SDimitry Andric /// \param __a 19900b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values to be reversed and 19910b57cec5SDimitry Andric /// stored. 199281ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, 199381ad6265SDimitry Andric __m128d __a) { 19940b57cec5SDimitry Andric __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0); 19950b57cec5SDimitry Andric *(__m128d *)__dp = __a; 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric /// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a 19990b57cec5SDimitry Andric /// memory location. 20000b57cec5SDimitry Andric /// 20010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20020b57cec5SDimitry Andric /// 20030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction. 20040b57cec5SDimitry Andric /// 20050b57cec5SDimitry Andric /// \param __dp 20060b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20070b57cec5SDimitry Andric /// \param __a 20080b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 200981ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, 201081ad6265SDimitry Andric __m128d __a) { 20110b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20120b57cec5SDimitry Andric double __u; 20130b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20140b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[1]; 20150b57cec5SDimitry Andric } 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a 20180b57cec5SDimitry Andric /// memory location. 20190b57cec5SDimitry Andric /// 20200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20210b57cec5SDimitry Andric /// 20220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction. 20230b57cec5SDimitry Andric /// 20240b57cec5SDimitry Andric /// \param __dp 20250b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. 20260b57cec5SDimitry Andric /// \param __a 20270b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the value to be stored. 202881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, 202981ad6265SDimitry Andric __m128d __a) { 20300b57cec5SDimitry Andric struct __mm_storeh_pd_struct { 20310b57cec5SDimitry Andric double __u; 20320b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 20330b57cec5SDimitry Andric ((struct __mm_storeh_pd_struct *)__dp)->__u = __a[0]; 20340b57cec5SDimitry Andric } 20350b57cec5SDimitry Andric 20360b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [16 x i8], 20370b57cec5SDimitry Andric /// saving the lower 8 bits of each sum in the corresponding element of a 20380b57cec5SDimitry Andric /// 128-bit result vector of [16 x i8]. 20390b57cec5SDimitry Andric /// 20400b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20410b57cec5SDimitry Andric /// 20420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20430b57cec5SDimitry Andric /// 20440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction. 20450b57cec5SDimitry Andric /// 20460b57cec5SDimitry Andric /// \param __a 20470b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20480b57cec5SDimitry Andric /// \param __b 20490b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 20500b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the sums of both 20510b57cec5SDimitry Andric /// parameters. 205281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, 205381ad6265SDimitry Andric __m128i __b) { 20540b57cec5SDimitry Andric return (__m128i)((__v16qu)__a + (__v16qu)__b); 20550b57cec5SDimitry Andric } 20560b57cec5SDimitry Andric 20570b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [8 x i16], 20580b57cec5SDimitry Andric /// saving the lower 16 bits of each sum in the corresponding element of a 20590b57cec5SDimitry Andric /// 128-bit result vector of [8 x i16]. 20600b57cec5SDimitry Andric /// 20610b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20620b57cec5SDimitry Andric /// 20630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20640b57cec5SDimitry Andric /// 20650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction. 20660b57cec5SDimitry Andric /// 20670b57cec5SDimitry Andric /// \param __a 20680b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20690b57cec5SDimitry Andric /// \param __b 20700b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 20710b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the sums of both 20720b57cec5SDimitry Andric /// parameters. 207381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, 207481ad6265SDimitry Andric __m128i __b) { 20750b57cec5SDimitry Andric return (__m128i)((__v8hu)__a + (__v8hu)__b); 20760b57cec5SDimitry Andric } 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [4 x i32], 20790b57cec5SDimitry Andric /// saving the lower 32 bits of each sum in the corresponding element of a 20800b57cec5SDimitry Andric /// 128-bit result vector of [4 x i32]. 20810b57cec5SDimitry Andric /// 20820b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 20830b57cec5SDimitry Andric /// 20840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 20850b57cec5SDimitry Andric /// 20860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction. 20870b57cec5SDimitry Andric /// 20880b57cec5SDimitry Andric /// \param __a 20890b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20900b57cec5SDimitry Andric /// \param __b 20910b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. 20920b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the sums of both 20930b57cec5SDimitry Andric /// parameters. 209481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, 209581ad6265SDimitry Andric __m128i __b) { 20960b57cec5SDimitry Andric return (__m128i)((__v4su)__a + (__v4su)__b); 20970b57cec5SDimitry Andric } 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric /// Adds two signed or unsigned 64-bit integer values, returning the 21000b57cec5SDimitry Andric /// lower 64 bits of the sum. 21010b57cec5SDimitry Andric /// 21020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21030b57cec5SDimitry Andric /// 21040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PADDQ </c> instruction. 21050b57cec5SDimitry Andric /// 21060b57cec5SDimitry Andric /// \param __a 21070b57cec5SDimitry Andric /// A 64-bit integer. 21080b57cec5SDimitry Andric /// \param __b 21090b57cec5SDimitry Andric /// A 64-bit integer. 21100b57cec5SDimitry Andric /// \returns A 64-bit integer containing the sum of both parameters. 211181ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, 211281ad6265SDimitry Andric __m64 __b) { 21130b57cec5SDimitry Andric return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); 21140b57cec5SDimitry Andric } 21150b57cec5SDimitry Andric 21160b57cec5SDimitry Andric /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], 21170b57cec5SDimitry Andric /// saving the lower 64 bits of each sum in the corresponding element of a 21180b57cec5SDimitry Andric /// 128-bit result vector of [2 x i64]. 21190b57cec5SDimitry Andric /// 21200b57cec5SDimitry Andric /// The integer elements of both parameters can be either signed or unsigned. 21210b57cec5SDimitry Andric /// 21220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21230b57cec5SDimitry Andric /// 21240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction. 21250b57cec5SDimitry Andric /// 21260b57cec5SDimitry Andric /// \param __a 21270b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 21280b57cec5SDimitry Andric /// \param __b 21290b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. 21300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the sums of both 21310b57cec5SDimitry Andric /// parameters. 213281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, 213381ad6265SDimitry Andric __m128i __b) { 21340b57cec5SDimitry Andric return (__m128i)((__v2du)__a + (__v2du)__b); 21350b57cec5SDimitry Andric } 21360b57cec5SDimitry Andric 21370b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21380fca6ea1SDimitry Andric /// signed [16 x i8] vectors, saving each sum in the corresponding element 21390fca6ea1SDimitry Andric /// of a 128-bit result vector of [16 x i8]. 21400fca6ea1SDimitry Andric /// 21410fca6ea1SDimitry Andric /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums 21420fca6ea1SDimitry Andric /// less than 0x80 are saturated to 0x80. 21430b57cec5SDimitry Andric /// 21440b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21450b57cec5SDimitry Andric /// 21460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction. 21470b57cec5SDimitry Andric /// 21480b57cec5SDimitry Andric /// \param __a 21490b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21500b57cec5SDimitry Andric /// \param __b 21510b57cec5SDimitry Andric /// A 128-bit signed [16 x i8] vector. 21520b57cec5SDimitry Andric /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of 21530b57cec5SDimitry Andric /// both parameters. 215481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, 215581ad6265SDimitry Andric __m128i __b) { 215681ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); 21570b57cec5SDimitry Andric } 21580b57cec5SDimitry Andric 21590b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21600fca6ea1SDimitry Andric /// signed [8 x i16] vectors, saving each sum in the corresponding element 21610fca6ea1SDimitry Andric /// of a 128-bit result vector of [8 x i16]. 21620fca6ea1SDimitry Andric /// 21630fca6ea1SDimitry Andric /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 21640fca6ea1SDimitry Andric /// less than 0x8000 are saturated to 0x8000. 21650b57cec5SDimitry Andric /// 21660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21670b57cec5SDimitry Andric /// 21680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction. 21690b57cec5SDimitry Andric /// 21700b57cec5SDimitry Andric /// \param __a 21710b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21720b57cec5SDimitry Andric /// \param __b 21730b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 21740b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of 21750b57cec5SDimitry Andric /// both parameters. 217681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, 217781ad6265SDimitry Andric __m128i __b) { 217881ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); 21790b57cec5SDimitry Andric } 21800b57cec5SDimitry Andric 21810b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 21820b57cec5SDimitry Andric /// unsigned [16 x i8] vectors, saving each sum in the corresponding element 21830fca6ea1SDimitry Andric /// of a 128-bit result vector of [16 x i8]. 21840fca6ea1SDimitry Andric /// 21850fca6ea1SDimitry Andric /// Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are 21860fca6ea1SDimitry Andric /// saturated to 0x00. 21870b57cec5SDimitry Andric /// 21880b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 21890b57cec5SDimitry Andric /// 21900b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 21910b57cec5SDimitry Andric /// 21920b57cec5SDimitry Andric /// \param __a 21930b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21940b57cec5SDimitry Andric /// \param __b 21950b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 21960b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums 21970b57cec5SDimitry Andric /// of both parameters. 219881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, 219981ad6265SDimitry Andric __m128i __b) { 220081ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); 22010b57cec5SDimitry Andric } 22020b57cec5SDimitry Andric 22030b57cec5SDimitry Andric /// Adds, with saturation, the corresponding elements of two 128-bit 22040b57cec5SDimitry Andric /// unsigned [8 x i16] vectors, saving each sum in the corresponding element 22050fca6ea1SDimitry Andric /// of a 128-bit result vector of [8 x i16]. 22060fca6ea1SDimitry Andric /// 22070fca6ea1SDimitry Andric /// Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums 22080fca6ea1SDimitry Andric /// are saturated to 0x0000. 22090b57cec5SDimitry Andric /// 22100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22110b57cec5SDimitry Andric /// 22120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction. 22130b57cec5SDimitry Andric /// 22140b57cec5SDimitry Andric /// \param __a 22150b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22160b57cec5SDimitry Andric /// \param __b 22170b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22180b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums 22190b57cec5SDimitry Andric /// of both parameters. 222081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, 222181ad6265SDimitry Andric __m128i __b) { 222281ad6265SDimitry Andric return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); 22230b57cec5SDimitry Andric } 22240b57cec5SDimitry Andric 2225480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 22260b57cec5SDimitry Andric /// 128-bit unsigned [16 x i8] vectors, saving each result in the 22270b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 22280b57cec5SDimitry Andric /// 22290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22300b57cec5SDimitry Andric /// 22310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction. 22320b57cec5SDimitry Andric /// 22330b57cec5SDimitry Andric /// \param __a 22340b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22350b57cec5SDimitry Andric /// \param __b 22360b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 22370b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded 22380b57cec5SDimitry Andric /// averages of both parameters. 223981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, 224081ad6265SDimitry Andric __m128i __b) { 22410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 22420b57cec5SDimitry Andric } 22430b57cec5SDimitry Andric 2244480093f4SDimitry Andric /// Computes the rounded averages of corresponding elements of two 22450b57cec5SDimitry Andric /// 128-bit unsigned [8 x i16] vectors, saving each result in the 22460b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22470b57cec5SDimitry Andric /// 22480b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22490b57cec5SDimitry Andric /// 22500b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction. 22510b57cec5SDimitry Andric /// 22520b57cec5SDimitry Andric /// \param __a 22530b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22540b57cec5SDimitry Andric /// \param __b 22550b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 22560b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded 22570b57cec5SDimitry Andric /// averages of both parameters. 225881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, 225981ad6265SDimitry Andric __m128i __b) { 22600b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 22610b57cec5SDimitry Andric } 22620b57cec5SDimitry Andric 22630b57cec5SDimitry Andric /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] 22640b57cec5SDimitry Andric /// vectors, producing eight intermediate 32-bit signed integer products, and 22650b57cec5SDimitry Andric /// adds the consecutive pairs of 32-bit products to form a 128-bit signed 22660b57cec5SDimitry Andric /// [4 x i32] vector. 22670b57cec5SDimitry Andric /// 22680b57cec5SDimitry Andric /// For example, bits [15:0] of both parameters are multiplied producing a 22690b57cec5SDimitry Andric /// 32-bit product, bits [31:16] of both parameters are multiplied producing 22700b57cec5SDimitry Andric /// a 32-bit product, and the sum of those two products becomes bits [31:0] 22710b57cec5SDimitry Andric /// of the result. 22720b57cec5SDimitry Andric /// 22730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22740b57cec5SDimitry Andric /// 22750b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction. 22760b57cec5SDimitry Andric /// 22770b57cec5SDimitry Andric /// \param __a 22780b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22790b57cec5SDimitry Andric /// \param __b 22800b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22810b57cec5SDimitry Andric /// \returns A 128-bit signed [4 x i32] vector containing the sums of products 22820b57cec5SDimitry Andric /// of both parameters. 228381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, 228481ad6265SDimitry Andric __m128i __b) { 22850b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 22890b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 22900b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 22910b57cec5SDimitry Andric /// 22920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 22930b57cec5SDimitry Andric /// 22940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction. 22950b57cec5SDimitry Andric /// 22960b57cec5SDimitry Andric /// \param __a 22970b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 22980b57cec5SDimitry Andric /// \param __b 22990b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23000b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the greater value of 23010b57cec5SDimitry Andric /// each comparison. 230281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, 230381ad6265SDimitry Andric __m128i __b) { 230404eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); 23050b57cec5SDimitry Andric } 23060b57cec5SDimitry Andric 23070b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 23080b57cec5SDimitry Andric /// vectors, saving the greater value from each comparison in the 23090b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 23100b57cec5SDimitry Andric /// 23110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23120b57cec5SDimitry Andric /// 23130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction. 23140b57cec5SDimitry Andric /// 23150b57cec5SDimitry Andric /// \param __a 23160b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23170b57cec5SDimitry Andric /// \param __b 23180b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23190b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of 23200b57cec5SDimitry Andric /// each comparison. 232181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, 232281ad6265SDimitry Andric __m128i __b) { 232304eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); 23240b57cec5SDimitry Andric } 23250b57cec5SDimitry Andric 23260b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit signed [8 x i16] 23270b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 23280b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [8 x i16]. 23290b57cec5SDimitry Andric /// 23300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23310b57cec5SDimitry Andric /// 23320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction. 23330b57cec5SDimitry Andric /// 23340b57cec5SDimitry Andric /// \param __a 23350b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23360b57cec5SDimitry Andric /// \param __b 23370b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23380b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of 23390b57cec5SDimitry Andric /// each comparison. 234081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, 234181ad6265SDimitry Andric __m128i __b) { 234204eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); 23430b57cec5SDimitry Andric } 23440b57cec5SDimitry Andric 23450b57cec5SDimitry Andric /// Compares corresponding elements of two 128-bit unsigned [16 x i8] 23460b57cec5SDimitry Andric /// vectors, saving the smaller value from each comparison in the 23470b57cec5SDimitry Andric /// corresponding element of a 128-bit result vector of [16 x i8]. 23480b57cec5SDimitry Andric /// 23490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23500b57cec5SDimitry Andric /// 23510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction. 23520b57cec5SDimitry Andric /// 23530b57cec5SDimitry Andric /// \param __a 23540b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23550b57cec5SDimitry Andric /// \param __b 23560b57cec5SDimitry Andric /// A 128-bit unsigned [16 x i8] vector. 23570b57cec5SDimitry Andric /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of 23580b57cec5SDimitry Andric /// each comparison. 235981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, 236081ad6265SDimitry Andric __m128i __b) { 236104eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); 23620b57cec5SDimitry Andric } 23630b57cec5SDimitry Andric 23640b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 23650b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23660b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 23670b57cec5SDimitry Andric /// 23680b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23690b57cec5SDimitry Andric /// 23700b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction. 23710b57cec5SDimitry Andric /// 23720b57cec5SDimitry Andric /// \param __a 23730b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23740b57cec5SDimitry Andric /// \param __b 23750b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 23760b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of 23770b57cec5SDimitry Andric /// each of the eight 32-bit products. 237881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, 237981ad6265SDimitry Andric __m128i __b) { 23800b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 23810b57cec5SDimitry Andric } 23820b57cec5SDimitry Andric 23830b57cec5SDimitry Andric /// Multiplies the corresponding elements of two unsigned [8 x i16] 23840b57cec5SDimitry Andric /// vectors, saving the upper 16 bits of each 32-bit product in the 23850b57cec5SDimitry Andric /// corresponding element of a 128-bit unsigned [8 x i16] result vector. 23860b57cec5SDimitry Andric /// 23870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 23880b57cec5SDimitry Andric /// 23890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction. 23900b57cec5SDimitry Andric /// 23910b57cec5SDimitry Andric /// \param __a 23920b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23930b57cec5SDimitry Andric /// \param __b 23940b57cec5SDimitry Andric /// A 128-bit unsigned [8 x i16] vector. 23950b57cec5SDimitry Andric /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits 23960b57cec5SDimitry Andric /// of each of the eight 32-bit products. 239781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, 239881ad6265SDimitry Andric __m128i __b) { 23990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 24000b57cec5SDimitry Andric } 24010b57cec5SDimitry Andric 24020b57cec5SDimitry Andric /// Multiplies the corresponding elements of two signed [8 x i16] 24030b57cec5SDimitry Andric /// vectors, saving the lower 16 bits of each 32-bit product in the 24040b57cec5SDimitry Andric /// corresponding element of a 128-bit signed [8 x i16] result vector. 24050b57cec5SDimitry Andric /// 24060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24070b57cec5SDimitry Andric /// 24080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction. 24090b57cec5SDimitry Andric /// 24100b57cec5SDimitry Andric /// \param __a 24110b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24120b57cec5SDimitry Andric /// \param __b 24130b57cec5SDimitry Andric /// A 128-bit signed [8 x i16] vector. 24140b57cec5SDimitry Andric /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of 24150b57cec5SDimitry Andric /// each of the eight 32-bit products. 241681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, 241781ad6265SDimitry Andric __m128i __b) { 24180b57cec5SDimitry Andric return (__m128i)((__v8hu)__a * (__v8hu)__b); 24190b57cec5SDimitry Andric } 24200b57cec5SDimitry Andric 24210b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower bits 24220b57cec5SDimitry Andric /// of the two 64-bit integer vectors and returns the 64-bit unsigned 24230b57cec5SDimitry Andric /// product. 24240b57cec5SDimitry Andric /// 24250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24260b57cec5SDimitry Andric /// 24270b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PMULUDQ </c> instruction. 24280b57cec5SDimitry Andric /// 24290b57cec5SDimitry Andric /// \param __a 24300b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 24310b57cec5SDimitry Andric /// \param __b 24320b57cec5SDimitry Andric /// A 64-bit integer containing one of the source operands. 24330b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the product of both operands. 243481ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, 243581ad6265SDimitry Andric __m64 __b) { 24360b57cec5SDimitry Andric return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 24370b57cec5SDimitry Andric } 24380b57cec5SDimitry Andric 24390b57cec5SDimitry Andric /// Multiplies 32-bit unsigned integer values contained in the lower 24400b57cec5SDimitry Andric /// bits of the corresponding elements of two [2 x i64] vectors, and returns 24410b57cec5SDimitry Andric /// the 64-bit products in the corresponding elements of a [2 x i64] vector. 24420b57cec5SDimitry Andric /// 24430b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24440b57cec5SDimitry Andric /// 24450b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction. 24460b57cec5SDimitry Andric /// 24470b57cec5SDimitry Andric /// \param __a 24480b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 24490b57cec5SDimitry Andric /// \param __b 24500b57cec5SDimitry Andric /// A [2 x i64] vector containing one of the source operands. 24510b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the product of both operands. 245281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, 245381ad6265SDimitry Andric __m128i __b) { 24540b57cec5SDimitry Andric return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 24550b57cec5SDimitry Andric } 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric /// Computes the absolute differences of corresponding 8-bit integer 24580b57cec5SDimitry Andric /// values in two 128-bit vectors. Sums the first 8 absolute differences, and 24590b57cec5SDimitry Andric /// separately sums the second 8 absolute differences. Packs these two 24600b57cec5SDimitry Andric /// unsigned 16-bit integer sums into the upper and lower elements of a 24610b57cec5SDimitry Andric /// [2 x i64] vector. 24620b57cec5SDimitry Andric /// 24630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24640b57cec5SDimitry Andric /// 24650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction. 24660b57cec5SDimitry Andric /// 24670b57cec5SDimitry Andric /// \param __a 24680b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24690b57cec5SDimitry Andric /// \param __b 24700b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 24710b57cec5SDimitry Andric /// \returns A [2 x i64] vector containing the sums of the sets of absolute 24720b57cec5SDimitry Andric /// differences between both operands. 247381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, 247481ad6265SDimitry Andric __m128i __b) { 24750b57cec5SDimitry Andric return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 24760b57cec5SDimitry Andric } 24770b57cec5SDimitry Andric 24780b57cec5SDimitry Andric /// Subtracts the corresponding 8-bit integer values in the operands. 24790b57cec5SDimitry Andric /// 24800b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24810b57cec5SDimitry Andric /// 24820b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction. 24830b57cec5SDimitry Andric /// 24840b57cec5SDimitry Andric /// \param __a 24850b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 24860b57cec5SDimitry Andric /// \param __b 24870b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 24880b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 24890b57cec5SDimitry Andric /// in the operands. 249081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, 249181ad6265SDimitry Andric __m128i __b) { 24920b57cec5SDimitry Andric return (__m128i)((__v16qu)__a - (__v16qu)__b); 24930b57cec5SDimitry Andric } 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric /// Subtracts the corresponding 16-bit integer values in the operands. 24960b57cec5SDimitry Andric /// 24970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 24980b57cec5SDimitry Andric /// 24990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction. 25000b57cec5SDimitry Andric /// 25010b57cec5SDimitry Andric /// \param __a 25020b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25030b57cec5SDimitry Andric /// \param __b 25040b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25050b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25060b57cec5SDimitry Andric /// in the operands. 250781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, 250881ad6265SDimitry Andric __m128i __b) { 25090b57cec5SDimitry Andric return (__m128i)((__v8hu)__a - (__v8hu)__b); 25100b57cec5SDimitry Andric } 25110b57cec5SDimitry Andric 25120b57cec5SDimitry Andric /// Subtracts the corresponding 32-bit integer values in the operands. 25130b57cec5SDimitry Andric /// 25140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25150b57cec5SDimitry Andric /// 25160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction. 25170b57cec5SDimitry Andric /// 25180b57cec5SDimitry Andric /// \param __a 25190b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25200b57cec5SDimitry Andric /// \param __b 25210b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25230b57cec5SDimitry Andric /// in the operands. 252481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, 252581ad6265SDimitry Andric __m128i __b) { 25260b57cec5SDimitry Andric return (__m128i)((__v4su)__a - (__v4su)__b); 25270b57cec5SDimitry Andric } 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric /// Subtracts signed or unsigned 64-bit integer values and writes the 25300b57cec5SDimitry Andric /// difference to the corresponding bits in the destination. 25310b57cec5SDimitry Andric /// 25320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25330b57cec5SDimitry Andric /// 25340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PSUBQ </c> instruction. 25350b57cec5SDimitry Andric /// 25360b57cec5SDimitry Andric /// \param __a 25370b57cec5SDimitry Andric /// A 64-bit integer vector containing the minuend. 25380b57cec5SDimitry Andric /// \param __b 25390b57cec5SDimitry Andric /// A 64-bit integer vector containing the subtrahend. 25400b57cec5SDimitry Andric /// \returns A 64-bit integer vector containing the difference of the values in 25410b57cec5SDimitry Andric /// the operands. 254281ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, 254381ad6265SDimitry Andric __m64 __b) { 25440b57cec5SDimitry Andric return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); 25450b57cec5SDimitry Andric } 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric /// Subtracts the corresponding elements of two [2 x i64] vectors. 25480b57cec5SDimitry Andric /// 25490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25500b57cec5SDimitry Andric /// 25510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction. 25520b57cec5SDimitry Andric /// 25530b57cec5SDimitry Andric /// \param __a 25540b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25550b57cec5SDimitry Andric /// \param __b 25560b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25570b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25580b57cec5SDimitry Andric /// in the operands. 255981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, 256081ad6265SDimitry Andric __m128i __b) { 25610b57cec5SDimitry Andric return (__m128i)((__v2du)__a - (__v2du)__b); 25620b57cec5SDimitry Andric } 25630b57cec5SDimitry Andric 25640fca6ea1SDimitry Andric /// Subtracts, with saturation, corresponding 8-bit signed integer values in 25650fca6ea1SDimitry Andric /// the input and returns the differences in the corresponding bytes in the 25660fca6ea1SDimitry Andric /// destination. 25670fca6ea1SDimitry Andric /// 25680fca6ea1SDimitry Andric /// Differences greater than 0x7F are saturated to 0x7F, and differences 25690fca6ea1SDimitry Andric /// less than 0x80 are saturated to 0x80. 25700b57cec5SDimitry Andric /// 25710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25720b57cec5SDimitry Andric /// 25730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction. 25740b57cec5SDimitry Andric /// 25750b57cec5SDimitry Andric /// \param __a 25760b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25770b57cec5SDimitry Andric /// \param __b 25780b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 25790b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 25800b57cec5SDimitry Andric /// in the operands. 258181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, 258281ad6265SDimitry Andric __m128i __b) { 258381ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); 25840b57cec5SDimitry Andric } 25850b57cec5SDimitry Andric 25860fca6ea1SDimitry Andric /// Subtracts, with saturation, corresponding 16-bit signed integer values in 25870fca6ea1SDimitry Andric /// the input and returns the differences in the corresponding bytes in the 25880fca6ea1SDimitry Andric /// destination. 25890fca6ea1SDimitry Andric /// 25900b57cec5SDimitry Andric /// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less 25910b57cec5SDimitry Andric /// than 0x8000 are saturated to 0x8000. 25920b57cec5SDimitry Andric /// 25930b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 25940b57cec5SDimitry Andric /// 25950b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction. 25960b57cec5SDimitry Andric /// 25970b57cec5SDimitry Andric /// \param __a 25980b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 25990b57cec5SDimitry Andric /// \param __b 26000b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26010b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the differences of the values 26020b57cec5SDimitry Andric /// in the operands. 260381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, 260481ad6265SDimitry Andric __m128i __b) { 260581ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); 26060b57cec5SDimitry Andric } 26070b57cec5SDimitry Andric 26080fca6ea1SDimitry Andric /// Subtracts, with saturation, corresponding 8-bit unsigned integer values in 26090fca6ea1SDimitry Andric /// the input and returns the differences in the corresponding bytes in the 26100fca6ea1SDimitry Andric /// destination. 26110fca6ea1SDimitry Andric /// 26120fca6ea1SDimitry Andric /// Differences less than 0x00 are saturated to 0x00. 26130b57cec5SDimitry Andric /// 26140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26150b57cec5SDimitry Andric /// 26160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction. 26170b57cec5SDimitry Andric /// 26180b57cec5SDimitry Andric /// \param __a 26190b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26200b57cec5SDimitry Andric /// \param __b 26210b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 26230b57cec5SDimitry Andric /// differences of the values in the operands. 262481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, 262581ad6265SDimitry Andric __m128i __b) { 262681ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); 26270b57cec5SDimitry Andric } 26280b57cec5SDimitry Andric 26290fca6ea1SDimitry Andric /// Subtracts, with saturation, corresponding 16-bit unsigned integer values in 26300fca6ea1SDimitry Andric /// the input and returns the differences in the corresponding bytes in the 26310fca6ea1SDimitry Andric /// destination. 26320fca6ea1SDimitry Andric /// 26330fca6ea1SDimitry Andric /// Differences less than 0x0000 are saturated to 0x0000. 26340b57cec5SDimitry Andric /// 26350b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26360b57cec5SDimitry Andric /// 26370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction. 26380b57cec5SDimitry Andric /// 26390b57cec5SDimitry Andric /// \param __a 26400b57cec5SDimitry Andric /// A 128-bit integer vector containing the minuends. 26410b57cec5SDimitry Andric /// \param __b 26420b57cec5SDimitry Andric /// A 128-bit integer vector containing the subtrahends. 26430b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the unsigned integer 26440b57cec5SDimitry Andric /// differences of the values in the operands. 264581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, 264681ad6265SDimitry Andric __m128i __b) { 264781ad6265SDimitry Andric return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); 26480b57cec5SDimitry Andric } 26490b57cec5SDimitry Andric 26500b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors. 26510b57cec5SDimitry Andric /// 26520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26530b57cec5SDimitry Andric /// 26540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction. 26550b57cec5SDimitry Andric /// 26560b57cec5SDimitry Andric /// \param __a 26570b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26580b57cec5SDimitry Andric /// \param __b 26590b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26600b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the values 26610b57cec5SDimitry Andric /// in both operands. 266281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, 266381ad6265SDimitry Andric __m128i __b) { 26640b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 26650b57cec5SDimitry Andric } 26660b57cec5SDimitry Andric 26670b57cec5SDimitry Andric /// Performs a bitwise AND of two 128-bit integer vectors, using the 26680b57cec5SDimitry Andric /// one's complement of the values contained in the first source operand. 26690b57cec5SDimitry Andric /// 26700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26710b57cec5SDimitry Andric /// 26720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction. 26730b57cec5SDimitry Andric /// 26740b57cec5SDimitry Andric /// \param __a 26750b57cec5SDimitry Andric /// A 128-bit vector containing the left source operand. The one's complement 26760b57cec5SDimitry Andric /// of this value is used in the bitwise AND. 26770b57cec5SDimitry Andric /// \param __b 26780b57cec5SDimitry Andric /// A 128-bit vector containing the right source operand. 26790b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise AND of the one's 26800b57cec5SDimitry Andric /// complement of the first operand and the values in the second operand. 268181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, 268281ad6265SDimitry Andric __m128i __b) { 26830b57cec5SDimitry Andric return (__m128i)(~(__v2du)__a & (__v2du)__b); 26840b57cec5SDimitry Andric } 26850b57cec5SDimitry Andric /// Performs a bitwise OR of two 128-bit integer vectors. 26860b57cec5SDimitry Andric /// 26870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 26880b57cec5SDimitry Andric /// 26890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPOR / POR </c> instruction. 26900b57cec5SDimitry Andric /// 26910b57cec5SDimitry Andric /// \param __a 26920b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26930b57cec5SDimitry Andric /// \param __b 26940b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 26950b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise OR of the values 26960b57cec5SDimitry Andric /// in both operands. 269781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, 269881ad6265SDimitry Andric __m128i __b) { 26990b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 27000b57cec5SDimitry Andric } 27010b57cec5SDimitry Andric 27020b57cec5SDimitry Andric /// Performs a bitwise exclusive OR of two 128-bit integer vectors. 27030b57cec5SDimitry Andric /// 27040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27050b57cec5SDimitry Andric /// 27060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction. 27070b57cec5SDimitry Andric /// 27080b57cec5SDimitry Andric /// \param __a 27090b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27100b57cec5SDimitry Andric /// \param __b 27110b57cec5SDimitry Andric /// A 128-bit integer vector containing one of the source operands. 27120b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the 27130b57cec5SDimitry Andric /// values in both operands. 271481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, 271581ad6265SDimitry Andric __m128i __b) { 27160b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 27170b57cec5SDimitry Andric } 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric /// Left-shifts the 128-bit integer vector operand by the specified 27200b57cec5SDimitry Andric /// number of bytes. Low-order bits are cleared. 27210b57cec5SDimitry Andric /// 27220b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27230b57cec5SDimitry Andric /// 27240b57cec5SDimitry Andric /// \code 27250b57cec5SDimitry Andric /// __m128i _mm_slli_si128(__m128i a, const int imm); 27260b57cec5SDimitry Andric /// \endcode 27270b57cec5SDimitry Andric /// 27280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction. 27290b57cec5SDimitry Andric /// 27300b57cec5SDimitry Andric /// \param a 27310b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27320b57cec5SDimitry Andric /// \param imm 27330b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to left-shift operand 27340b57cec5SDimitry Andric /// \a a. 27350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted value. 27360b57cec5SDimitry Andric #define _mm_slli_si128(a, imm) \ 273781ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 273881ad6265SDimitry Andric (int)(imm))) 27390b57cec5SDimitry Andric 27400b57cec5SDimitry Andric #define _mm_bslli_si128(a, imm) \ 274181ad6265SDimitry Andric ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), \ 274281ad6265SDimitry Andric (int)(imm))) 27430b57cec5SDimitry Andric 27440b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 27450b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27460b57cec5SDimitry Andric /// 27470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27480b57cec5SDimitry Andric /// 27490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 27500b57cec5SDimitry Andric /// 27510b57cec5SDimitry Andric /// \param __a 27520b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27530b57cec5SDimitry Andric /// \param __count 27540b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27550b57cec5SDimitry Andric /// in operand \a __a. 27560b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 275781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, 275881ad6265SDimitry Andric int __count) { 27590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 27600b57cec5SDimitry Andric } 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric /// Left-shifts each 16-bit value in the 128-bit integer vector operand 27630b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27640b57cec5SDimitry Andric /// 27650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27660b57cec5SDimitry Andric /// 27670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction. 27680b57cec5SDimitry Andric /// 27690b57cec5SDimitry Andric /// \param __a 27700b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27710b57cec5SDimitry Andric /// \param __count 27720b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 27730b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 27740b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 277581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, 277681ad6265SDimitry Andric __m128i __count) { 27770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 27780b57cec5SDimitry Andric } 27790b57cec5SDimitry Andric 27800b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27810b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 27820b57cec5SDimitry Andric /// 27830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 27840b57cec5SDimitry Andric /// 27850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 27860b57cec5SDimitry Andric /// 27870b57cec5SDimitry Andric /// \param __a 27880b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 27890b57cec5SDimitry Andric /// \param __count 27900b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 27910b57cec5SDimitry Andric /// in operand \a __a. 27920b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 279381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, 279481ad6265SDimitry Andric int __count) { 27950b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 27960b57cec5SDimitry Andric } 27970b57cec5SDimitry Andric 27980b57cec5SDimitry Andric /// Left-shifts each 32-bit value in the 128-bit integer vector operand 27990b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28000b57cec5SDimitry Andric /// 28010b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28020b57cec5SDimitry Andric /// 28030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction. 28040b57cec5SDimitry Andric /// 28050b57cec5SDimitry Andric /// \param __a 28060b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28070b57cec5SDimitry Andric /// \param __count 28080b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28090b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28100b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 281181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, 281281ad6265SDimitry Andric __m128i __count) { 28130b57cec5SDimitry Andric return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 28140b57cec5SDimitry Andric } 28150b57cec5SDimitry Andric 28160b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 28170b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28180b57cec5SDimitry Andric /// 28190b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28200b57cec5SDimitry Andric /// 28210b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 28220b57cec5SDimitry Andric /// 28230b57cec5SDimitry Andric /// \param __a 28240b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28250b57cec5SDimitry Andric /// \param __count 28260b57cec5SDimitry Andric /// An integer value specifying the number of bits to left-shift each value 28270b57cec5SDimitry Andric /// in operand \a __a. 28280b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 282981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, 283081ad6265SDimitry Andric int __count) { 28310b57cec5SDimitry Andric return __builtin_ia32_psllqi128((__v2di)__a, __count); 28320b57cec5SDimitry Andric } 28330b57cec5SDimitry Andric 28340b57cec5SDimitry Andric /// Left-shifts each 64-bit value in the 128-bit integer vector operand 28350b57cec5SDimitry Andric /// by the specified number of bits. Low-order bits are cleared. 28360b57cec5SDimitry Andric /// 28370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28380b57cec5SDimitry Andric /// 28390b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction. 28400b57cec5SDimitry Andric /// 28410b57cec5SDimitry Andric /// \param __a 28420b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28430b57cec5SDimitry Andric /// \param __count 28440b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28450b57cec5SDimitry Andric /// to left-shift each value in operand \a __a. 28460b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the left-shifted values. 284781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, 284881ad6265SDimitry Andric __m128i __count) { 28490b57cec5SDimitry Andric return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count); 28500b57cec5SDimitry Andric } 28510b57cec5SDimitry Andric 28520b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 28530b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28540b57cec5SDimitry Andric /// bit of the initial value. 28550b57cec5SDimitry Andric /// 28560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28570b57cec5SDimitry Andric /// 28580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 28590b57cec5SDimitry Andric /// 28600b57cec5SDimitry Andric /// \param __a 28610b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28620b57cec5SDimitry Andric /// \param __count 28630b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 28640b57cec5SDimitry Andric /// in operand \a __a. 28650b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 286681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, 286781ad6265SDimitry Andric int __count) { 28680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 28690b57cec5SDimitry Andric } 28700b57cec5SDimitry Andric 28710b57cec5SDimitry Andric /// Right-shifts each 16-bit value in the 128-bit integer vector operand 28720b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28730b57cec5SDimitry Andric /// bit of the initial value. 28740b57cec5SDimitry Andric /// 28750b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28760b57cec5SDimitry Andric /// 28770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction. 28780b57cec5SDimitry Andric /// 28790b57cec5SDimitry Andric /// \param __a 28800b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 28810b57cec5SDimitry Andric /// \param __count 28820b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 28830b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 28840b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 288581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, 288681ad6265SDimitry Andric __m128i __count) { 28870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 28880b57cec5SDimitry Andric } 28890b57cec5SDimitry Andric 28900b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 28910b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 28920b57cec5SDimitry Andric /// bit of the initial value. 28930b57cec5SDimitry Andric /// 28940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 28950b57cec5SDimitry Andric /// 28960b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 28970b57cec5SDimitry Andric /// 28980b57cec5SDimitry Andric /// \param __a 28990b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29000b57cec5SDimitry Andric /// \param __count 29010b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29020b57cec5SDimitry Andric /// in operand \a __a. 29030b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 290481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, 290581ad6265SDimitry Andric int __count) { 29060b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 29070b57cec5SDimitry Andric } 29080b57cec5SDimitry Andric 29090b57cec5SDimitry Andric /// Right-shifts each 32-bit value in the 128-bit integer vector operand 29100b57cec5SDimitry Andric /// by the specified number of bits. High-order bits are filled with the sign 29110b57cec5SDimitry Andric /// bit of the initial value. 29120b57cec5SDimitry Andric /// 29130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29140b57cec5SDimitry Andric /// 29150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction. 29160b57cec5SDimitry Andric /// 29170b57cec5SDimitry Andric /// \param __a 29180b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29190b57cec5SDimitry Andric /// \param __count 29200b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29210b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29220b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 292381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, 292481ad6265SDimitry Andric __m128i __count) { 29250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 29260b57cec5SDimitry Andric } 29270b57cec5SDimitry Andric 29280b57cec5SDimitry Andric /// Right-shifts the 128-bit integer vector operand by the specified 29290b57cec5SDimitry Andric /// number of bytes. High-order bits are cleared. 29300b57cec5SDimitry Andric /// 29310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29320b57cec5SDimitry Andric /// 29330b57cec5SDimitry Andric /// \code 29340b57cec5SDimitry Andric /// __m128i _mm_srli_si128(__m128i a, const int imm); 29350b57cec5SDimitry Andric /// \endcode 29360b57cec5SDimitry Andric /// 29370b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction. 29380b57cec5SDimitry Andric /// 29390b57cec5SDimitry Andric /// \param a 29400b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29410b57cec5SDimitry Andric /// \param imm 29420b57cec5SDimitry Andric /// An immediate value specifying the number of bytes to right-shift operand 29430b57cec5SDimitry Andric /// \a a. 29440b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted value. 29450b57cec5SDimitry Andric #define _mm_srli_si128(a, imm) \ 294681ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 294781ad6265SDimitry Andric (int)(imm))) 29480b57cec5SDimitry Andric 29490b57cec5SDimitry Andric #define _mm_bsrli_si128(a, imm) \ 295081ad6265SDimitry Andric ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), \ 295181ad6265SDimitry Andric (int)(imm))) 29520b57cec5SDimitry Andric 29530b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 29540b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29550b57cec5SDimitry Andric /// 29560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29570b57cec5SDimitry Andric /// 29580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 29590b57cec5SDimitry Andric /// 29600b57cec5SDimitry Andric /// \param __a 29610b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29620b57cec5SDimitry Andric /// \param __count 29630b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 29640b57cec5SDimitry Andric /// in operand \a __a. 29650b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 296681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, 296781ad6265SDimitry Andric int __count) { 29680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 29690b57cec5SDimitry Andric } 29700b57cec5SDimitry Andric 29710b57cec5SDimitry Andric /// Right-shifts each of 16-bit values in the 128-bit integer vector 29720b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29730b57cec5SDimitry Andric /// 29740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29750b57cec5SDimitry Andric /// 29760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction. 29770b57cec5SDimitry Andric /// 29780b57cec5SDimitry Andric /// \param __a 29790b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29800b57cec5SDimitry Andric /// \param __count 29810b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 29820b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 29830b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 298481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, 298581ad6265SDimitry Andric __m128i __count) { 29860b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 29870b57cec5SDimitry Andric } 29880b57cec5SDimitry Andric 29890b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 29900b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 29910b57cec5SDimitry Andric /// 29920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 29930b57cec5SDimitry Andric /// 29940b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 29950b57cec5SDimitry Andric /// 29960b57cec5SDimitry Andric /// \param __a 29970b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 29980b57cec5SDimitry Andric /// \param __count 29990b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30000b57cec5SDimitry Andric /// in operand \a __a. 30010b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 300281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, 300381ad6265SDimitry Andric int __count) { 30040b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 30050b57cec5SDimitry Andric } 30060b57cec5SDimitry Andric 30070b57cec5SDimitry Andric /// Right-shifts each of 32-bit values in the 128-bit integer vector 30080b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30090b57cec5SDimitry Andric /// 30100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30110b57cec5SDimitry Andric /// 30120b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction. 30130b57cec5SDimitry Andric /// 30140b57cec5SDimitry Andric /// \param __a 30150b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30160b57cec5SDimitry Andric /// \param __count 30170b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30180b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30190b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 302081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, 302181ad6265SDimitry Andric __m128i __count) { 30220b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 30230b57cec5SDimitry Andric } 30240b57cec5SDimitry Andric 30250b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 30260b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30270b57cec5SDimitry Andric /// 30280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30290b57cec5SDimitry Andric /// 30300b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 30310b57cec5SDimitry Andric /// 30320b57cec5SDimitry Andric /// \param __a 30330b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30340b57cec5SDimitry Andric /// \param __count 30350b57cec5SDimitry Andric /// An integer value specifying the number of bits to right-shift each value 30360b57cec5SDimitry Andric /// in operand \a __a. 30370b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 303881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, 303981ad6265SDimitry Andric int __count) { 30400b57cec5SDimitry Andric return __builtin_ia32_psrlqi128((__v2di)__a, __count); 30410b57cec5SDimitry Andric } 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric /// Right-shifts each of 64-bit values in the 128-bit integer vector 30440b57cec5SDimitry Andric /// operand by the specified number of bits. High-order bits are cleared. 30450b57cec5SDimitry Andric /// 30460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30470b57cec5SDimitry Andric /// 30480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction. 30490b57cec5SDimitry Andric /// 30500b57cec5SDimitry Andric /// \param __a 30510b57cec5SDimitry Andric /// A 128-bit integer vector containing the source operand. 30520b57cec5SDimitry Andric /// \param __count 30530b57cec5SDimitry Andric /// A 128-bit integer vector in which bits [63:0] specify the number of bits 30540b57cec5SDimitry Andric /// to right-shift each value in operand \a __a. 30550b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the right-shifted values. 305681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, 305781ad6265SDimitry Andric __m128i __count) { 30580b57cec5SDimitry Andric return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count); 30590b57cec5SDimitry Andric } 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric /// Compares each of the corresponding 8-bit values of the 128-bit 30620fca6ea1SDimitry Andric /// integer vectors for equality. 30630fca6ea1SDimitry Andric /// 30640fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFF for true. 30650b57cec5SDimitry Andric /// 30660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30670b57cec5SDimitry Andric /// 30680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction. 30690b57cec5SDimitry Andric /// 30700b57cec5SDimitry Andric /// \param __a 30710b57cec5SDimitry Andric /// A 128-bit integer vector. 30720b57cec5SDimitry Andric /// \param __b 30730b57cec5SDimitry Andric /// A 128-bit integer vector. 30740b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 307581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, 307681ad6265SDimitry Andric __m128i __b) { 30770b57cec5SDimitry Andric return (__m128i)((__v16qi)__a == (__v16qi)__b); 30780b57cec5SDimitry Andric } 30790b57cec5SDimitry Andric 30800b57cec5SDimitry Andric /// Compares each of the corresponding 16-bit values of the 128-bit 30810fca6ea1SDimitry Andric /// integer vectors for equality. 30820fca6ea1SDimitry Andric /// 30830fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFF for true. 30840b57cec5SDimitry Andric /// 30850b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 30860b57cec5SDimitry Andric /// 30870b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction. 30880b57cec5SDimitry Andric /// 30890b57cec5SDimitry Andric /// \param __a 30900b57cec5SDimitry Andric /// A 128-bit integer vector. 30910b57cec5SDimitry Andric /// \param __b 30920b57cec5SDimitry Andric /// A 128-bit integer vector. 30930b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 309481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, 309581ad6265SDimitry Andric __m128i __b) { 30960b57cec5SDimitry Andric return (__m128i)((__v8hi)__a == (__v8hi)__b); 30970b57cec5SDimitry Andric } 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric /// Compares each of the corresponding 32-bit values of the 128-bit 31000fca6ea1SDimitry Andric /// integer vectors for equality. 31010fca6ea1SDimitry Andric /// 31020fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. 31030b57cec5SDimitry Andric /// 31040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31050b57cec5SDimitry Andric /// 31060b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction. 31070b57cec5SDimitry Andric /// 31080b57cec5SDimitry Andric /// \param __a 31090b57cec5SDimitry Andric /// A 128-bit integer vector. 31100b57cec5SDimitry Andric /// \param __b 31110b57cec5SDimitry Andric /// A 128-bit integer vector. 31120b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 311381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, 311481ad6265SDimitry Andric __m128i __b) { 31150b57cec5SDimitry Andric return (__m128i)((__v4si)__a == (__v4si)__b); 31160b57cec5SDimitry Andric } 31170b57cec5SDimitry Andric 31180b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 31190b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are 31200fca6ea1SDimitry Andric /// greater than those in the second operand. 31210fca6ea1SDimitry Andric /// 31220fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFF for true. 31230b57cec5SDimitry Andric /// 31240b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31250b57cec5SDimitry Andric /// 31260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 31270b57cec5SDimitry Andric /// 31280b57cec5SDimitry Andric /// \param __a 31290b57cec5SDimitry Andric /// A 128-bit integer vector. 31300b57cec5SDimitry Andric /// \param __b 31310b57cec5SDimitry Andric /// A 128-bit integer vector. 31320b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 313381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, 313481ad6265SDimitry Andric __m128i __b) { 31350b57cec5SDimitry Andric /* This function always performs a signed comparison, but __v16qi is a char 31360b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 31370b57cec5SDimitry Andric return (__m128i)((__v16qs)__a > (__v16qs)__b); 31380b57cec5SDimitry Andric } 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 31410b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31420b57cec5SDimitry Andric /// are greater than those in the second operand. 31430b57cec5SDimitry Andric /// 31440fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFF for true. 31450b57cec5SDimitry Andric /// 31460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31470b57cec5SDimitry Andric /// 31480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 31490b57cec5SDimitry Andric /// 31500b57cec5SDimitry Andric /// \param __a 31510b57cec5SDimitry Andric /// A 128-bit integer vector. 31520b57cec5SDimitry Andric /// \param __b 31530b57cec5SDimitry Andric /// A 128-bit integer vector. 31540b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 315581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, 315681ad6265SDimitry Andric __m128i __b) { 31570b57cec5SDimitry Andric return (__m128i)((__v8hi)__a > (__v8hi)__b); 31580b57cec5SDimitry Andric } 31590b57cec5SDimitry Andric 31600b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 31610b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 31620b57cec5SDimitry Andric /// are greater than those in the second operand. 31630b57cec5SDimitry Andric /// 31640fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. 31650b57cec5SDimitry Andric /// 31660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31670b57cec5SDimitry Andric /// 31680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 31690b57cec5SDimitry Andric /// 31700b57cec5SDimitry Andric /// \param __a 31710b57cec5SDimitry Andric /// A 128-bit integer vector. 31720b57cec5SDimitry Andric /// \param __b 31730b57cec5SDimitry Andric /// A 128-bit integer vector. 31740b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 317581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, 317681ad6265SDimitry Andric __m128i __b) { 31770b57cec5SDimitry Andric return (__m128i)((__v4si)__a > (__v4si)__b); 31780b57cec5SDimitry Andric } 31790b57cec5SDimitry Andric 31800b57cec5SDimitry Andric /// Compares each of the corresponding signed 8-bit values of the 128-bit 31810b57cec5SDimitry Andric /// integer vectors to determine if the values in the first operand are less 31820b57cec5SDimitry Andric /// than those in the second operand. 31830b57cec5SDimitry Andric /// 31840fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFF for true. 31850b57cec5SDimitry Andric /// 31860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 31870b57cec5SDimitry Andric /// 31880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction. 31890b57cec5SDimitry Andric /// 31900b57cec5SDimitry Andric /// \param __a 31910b57cec5SDimitry Andric /// A 128-bit integer vector. 31920b57cec5SDimitry Andric /// \param __b 31930b57cec5SDimitry Andric /// A 128-bit integer vector. 31940b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 319581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, 319681ad6265SDimitry Andric __m128i __b) { 31970b57cec5SDimitry Andric return _mm_cmpgt_epi8(__b, __a); 31980b57cec5SDimitry Andric } 31990b57cec5SDimitry Andric 32000b57cec5SDimitry Andric /// Compares each of the corresponding signed 16-bit values of the 32010b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32020b57cec5SDimitry Andric /// are less than those in the second operand. 32030b57cec5SDimitry Andric /// 32040fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFF for true. 32050b57cec5SDimitry Andric /// 32060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32070b57cec5SDimitry Andric /// 32080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction. 32090b57cec5SDimitry Andric /// 32100b57cec5SDimitry Andric /// \param __a 32110b57cec5SDimitry Andric /// A 128-bit integer vector. 32120b57cec5SDimitry Andric /// \param __b 32130b57cec5SDimitry Andric /// A 128-bit integer vector. 32140b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 321581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, 321681ad6265SDimitry Andric __m128i __b) { 32170b57cec5SDimitry Andric return _mm_cmpgt_epi16(__b, __a); 32180b57cec5SDimitry Andric } 32190b57cec5SDimitry Andric 32200b57cec5SDimitry Andric /// Compares each of the corresponding signed 32-bit values of the 32210b57cec5SDimitry Andric /// 128-bit integer vectors to determine if the values in the first operand 32220b57cec5SDimitry Andric /// are less than those in the second operand. 32230b57cec5SDimitry Andric /// 32240fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFF for true. 32250b57cec5SDimitry Andric /// 32260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32270b57cec5SDimitry Andric /// 32280b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction. 32290b57cec5SDimitry Andric /// 32300b57cec5SDimitry Andric /// \param __a 32310b57cec5SDimitry Andric /// A 128-bit integer vector. 32320b57cec5SDimitry Andric /// \param __b 32330b57cec5SDimitry Andric /// A 128-bit integer vector. 32340b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the comparison results. 323581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, 323681ad6265SDimitry Andric __m128i __b) { 32370b57cec5SDimitry Andric return _mm_cmpgt_epi32(__b, __a); 32380b57cec5SDimitry Andric } 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric #ifdef __x86_64__ 32410b57cec5SDimitry Andric /// Converts a 64-bit signed integer value from the second operand into a 32420b57cec5SDimitry Andric /// double-precision value and returns it in the lower element of a [2 x 32430b57cec5SDimitry Andric /// double] vector; the upper element of the returned vector is copied from 32440b57cec5SDimitry Andric /// the upper element of the first operand. 32450b57cec5SDimitry Andric /// 32460b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32470b57cec5SDimitry Andric /// 32480b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction. 32490b57cec5SDimitry Andric /// 32500b57cec5SDimitry Andric /// \param __a 32510b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are 32520b57cec5SDimitry Andric /// copied to the upper 64 bits of the destination. 32530b57cec5SDimitry Andric /// \param __b 32540b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 32550b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the 32560b57cec5SDimitry Andric /// converted value of the second operand. The upper 64 bits are copied from 32570b57cec5SDimitry Andric /// the upper 64 bits of the first operand. 325881ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, 325981ad6265SDimitry Andric long long __b) { 32600b57cec5SDimitry Andric __a[0] = __b; 32610b57cec5SDimitry Andric return __a; 32620b57cec5SDimitry Andric } 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32650fca6ea1SDimitry Andric /// 64-bit signed integer value. 32660fca6ea1SDimitry Andric /// 32670fca6ea1SDimitry Andric /// If the converted value does not fit in a 64-bit integer, raises a 32680fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 32690fca6ea1SDimitry Andric /// the most negative integer. 32700b57cec5SDimitry Andric /// 32710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32720b57cec5SDimitry Andric /// 32730b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction. 32740b57cec5SDimitry Andric /// 32750b57cec5SDimitry Andric /// \param __a 32760b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32770b57cec5SDimitry Andric /// conversion. 32780b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 327981ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsd_si64(__m128d __a) { 32800b57cec5SDimitry Andric return __builtin_ia32_cvtsd2si64((__v2df)__a); 32810b57cec5SDimitry Andric } 32820b57cec5SDimitry Andric 32830b57cec5SDimitry Andric /// Converts the first (lower) element of a vector of [2 x double] into a 32840fca6ea1SDimitry Andric /// 64-bit signed truncated (rounded toward zero) integer value. 32850fca6ea1SDimitry Andric /// 32860fca6ea1SDimitry Andric /// If a converted value does not fit in a 64-bit integer, raises a 32870fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 32880fca6ea1SDimitry Andric /// the most negative integer. 32890b57cec5SDimitry Andric /// 32900b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 32910b57cec5SDimitry Andric /// 32920b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c> 32930b57cec5SDimitry Andric /// instruction. 32940b57cec5SDimitry Andric /// 32950b57cec5SDimitry Andric /// \param __a 32960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. The lower 64 bits are used in the 32970b57cec5SDimitry Andric /// conversion. 32980b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the converted value. 329981ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { 33000b57cec5SDimitry Andric return __builtin_ia32_cvttsd2si64((__v2df)__a); 33010b57cec5SDimitry Andric } 33020b57cec5SDimitry Andric #endif 33030b57cec5SDimitry Andric 33040b57cec5SDimitry Andric /// Converts a vector of [4 x i32] into a vector of [4 x float]. 33050b57cec5SDimitry Andric /// 33060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33070b57cec5SDimitry Andric /// 33080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction. 33090b57cec5SDimitry Andric /// 33100b57cec5SDimitry Andric /// \param __a 33110b57cec5SDimitry Andric /// A 128-bit integer vector. 33120b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the converted values. 331381ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { 33140b57cec5SDimitry Andric return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); 33150b57cec5SDimitry Andric } 33160b57cec5SDimitry Andric 33170b57cec5SDimitry Andric /// Converts a vector of [4 x float] into a vector of [4 x i32]. 33180b57cec5SDimitry Andric /// 33190fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 33200fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 33210fca6ea1SDimitry Andric /// the most negative integer. 33220fca6ea1SDimitry Andric /// 33230b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33240b57cec5SDimitry Andric /// 33250b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction. 33260b57cec5SDimitry Andric /// 33270b57cec5SDimitry Andric /// \param __a 33280b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 33290b57cec5SDimitry Andric /// \returns A 128-bit integer vector of [4 x i32] containing the converted 33300b57cec5SDimitry Andric /// values. 333181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a) { 33320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a); 33330b57cec5SDimitry Andric } 33340b57cec5SDimitry Andric 33350fca6ea1SDimitry Andric /// Converts a vector of [4 x float] into four signed truncated (rounded toward 33360fca6ea1SDimitry Andric /// zero) 32-bit integers, returned in a vector of [4 x i32]. 33370fca6ea1SDimitry Andric /// 33380fca6ea1SDimitry Andric /// If a converted value does not fit in a 32-bit integer, raises a 33390fca6ea1SDimitry Andric /// floating-point invalid exception. If the exception is masked, returns 33400fca6ea1SDimitry Andric /// the most negative integer. 33410b57cec5SDimitry Andric /// 33420b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33430b57cec5SDimitry Andric /// 33440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c> 33450b57cec5SDimitry Andric /// instruction. 33460b57cec5SDimitry Andric /// 33470b57cec5SDimitry Andric /// \param __a 33480b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 33490b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the converted values. 335081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) { 33510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); 33520b57cec5SDimitry Andric } 33530b57cec5SDimitry Andric 33540b57cec5SDimitry Andric /// Returns a vector of [4 x i32] where the lowest element is the input 33550b57cec5SDimitry Andric /// operand and the remaining elements are zero. 33560b57cec5SDimitry Andric /// 33570b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33580b57cec5SDimitry Andric /// 33590b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 33600b57cec5SDimitry Andric /// 33610b57cec5SDimitry Andric /// \param __a 33620b57cec5SDimitry Andric /// A 32-bit signed integer operand. 33630b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32]. 336481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) { 33650b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__a, 0, 0, 0}; 33660b57cec5SDimitry Andric } 33670b57cec5SDimitry Andric 33680b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is the input 33690b57cec5SDimitry Andric /// operand and the upper element is zero. 33700b57cec5SDimitry Andric /// 33710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33720b57cec5SDimitry Andric /// 337381ad6265SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction 337481ad6265SDimitry Andric /// in 64-bit mode. 33750b57cec5SDimitry Andric /// 33760b57cec5SDimitry Andric /// \param __a 33770b57cec5SDimitry Andric /// A 64-bit signed integer operand containing the value to be converted. 33780b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the converted value. 337981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) { 33800b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__a, 0}; 33810b57cec5SDimitry Andric } 33820b57cec5SDimitry Andric 33830b57cec5SDimitry Andric /// Moves the least significant 32 bits of a vector of [4 x i32] to a 33840b57cec5SDimitry Andric /// 32-bit signed integer value. 33850b57cec5SDimitry Andric /// 33860b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 33870b57cec5SDimitry Andric /// 33880b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 33890b57cec5SDimitry Andric /// 33900b57cec5SDimitry Andric /// \param __a 33910b57cec5SDimitry Andric /// A vector of [4 x i32]. The least significant 32 bits are moved to the 33920b57cec5SDimitry Andric /// destination. 33930b57cec5SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 339481ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) { 33950b57cec5SDimitry Andric __v4si __b = (__v4si)__a; 33960b57cec5SDimitry Andric return __b[0]; 33970b57cec5SDimitry Andric } 33980b57cec5SDimitry Andric 33990b57cec5SDimitry Andric /// Moves the least significant 64 bits of a vector of [2 x i64] to a 34000b57cec5SDimitry Andric /// 64-bit signed integer value. 34010b57cec5SDimitry Andric /// 34020b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34030b57cec5SDimitry Andric /// 34040b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 34050b57cec5SDimitry Andric /// 34060b57cec5SDimitry Andric /// \param __a 34070b57cec5SDimitry Andric /// A vector of [2 x i64]. The least significant 64 bits are moved to the 34080b57cec5SDimitry Andric /// destination. 34090b57cec5SDimitry Andric /// \returns A 64-bit signed integer containing the moved value. 341081ad6265SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) { 34110b57cec5SDimitry Andric return __a[0]; 34120b57cec5SDimitry Andric } 34130b57cec5SDimitry Andric 34140b57cec5SDimitry Andric /// Moves packed integer values from an aligned 128-bit memory location 34150b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 34160b57cec5SDimitry Andric /// 34170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34180b57cec5SDimitry Andric /// 34190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction. 34200b57cec5SDimitry Andric /// 34210b57cec5SDimitry Andric /// \param __p 34220b57cec5SDimitry Andric /// An aligned pointer to a memory location containing integer values. 34230b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 34240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 342581ad6265SDimitry Andric _mm_load_si128(__m128i const *__p) { 34260b57cec5SDimitry Andric return *__p; 34270b57cec5SDimitry Andric } 34280b57cec5SDimitry Andric 34290b57cec5SDimitry Andric /// Moves packed integer values from an unaligned 128-bit memory location 34300b57cec5SDimitry Andric /// to elements in a 128-bit integer vector. 34310b57cec5SDimitry Andric /// 34320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34330b57cec5SDimitry Andric /// 34340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction. 34350b57cec5SDimitry Andric /// 34360b57cec5SDimitry Andric /// \param __p 34370b57cec5SDimitry Andric /// A pointer to a memory location containing integer values. 34380b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the moved values. 34390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 344081ad6265SDimitry Andric _mm_loadu_si128(__m128i_u const *__p) { 34410b57cec5SDimitry Andric struct __loadu_si128 { 34420b57cec5SDimitry Andric __m128i_u __v; 34430b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 3444480093f4SDimitry Andric return ((const struct __loadu_si128 *)__p)->__v; 34450b57cec5SDimitry Andric } 34460b57cec5SDimitry Andric 34470b57cec5SDimitry Andric /// Returns a vector of [2 x i64] where the lower element is taken from 34480b57cec5SDimitry Andric /// the lower element of the operand, and the upper element is zero. 34490b57cec5SDimitry Andric /// 34500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34510b57cec5SDimitry Andric /// 34520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 34530b57cec5SDimitry Andric /// 34540b57cec5SDimitry Andric /// \param __p 34550b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of 34560b57cec5SDimitry Andric /// the destination. 34570b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the 34580b57cec5SDimitry Andric /// moved value. The higher order bits are cleared. 34590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 346081ad6265SDimitry Andric _mm_loadl_epi64(__m128i_u const *__p) { 34610b57cec5SDimitry Andric struct __mm_loadl_epi64_struct { 34620b57cec5SDimitry Andric long long __u; 34630b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 346481ad6265SDimitry Andric return __extension__(__m128i){ 346581ad6265SDimitry Andric ((const struct __mm_loadl_epi64_struct *)__p)->__u, 0}; 34660b57cec5SDimitry Andric } 34670b57cec5SDimitry Andric 34680b57cec5SDimitry Andric /// Generates a 128-bit vector of [4 x i32] with unspecified content. 34690b57cec5SDimitry Andric /// This could be used as an argument to another intrinsic function where the 34700b57cec5SDimitry Andric /// argument is required but the value is not actually used. 34710b57cec5SDimitry Andric /// 34720b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34730b57cec5SDimitry Andric /// 34740b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 34750b57cec5SDimitry Andric /// 34760b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] with unspecified content. 347781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { 34780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_undef128(); 34790b57cec5SDimitry Andric } 34800b57cec5SDimitry Andric 34810b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 34820b57cec5SDimitry Andric /// the specified 64-bit integer values. 34830b57cec5SDimitry Andric /// 34840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 34850b57cec5SDimitry Andric /// 34860b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 34870b57cec5SDimitry Andric /// instruction. 34880b57cec5SDimitry Andric /// 34890b57cec5SDimitry Andric /// \param __q1 34900b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 34910b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34920b57cec5SDimitry Andric /// \param __q0 34930b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 34940b57cec5SDimitry Andric /// destination vector of [2 x i64]. 34950b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 34960b57cec5SDimitry Andric /// provided in the operands. 349781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, 349881ad6265SDimitry Andric long long __q0) { 34990b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){__q0, __q1}; 35000b57cec5SDimitry Andric } 35010b57cec5SDimitry Andric 35020b57cec5SDimitry Andric /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with 35030b57cec5SDimitry Andric /// the specified 64-bit integer values. 35040b57cec5SDimitry Andric /// 35050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35060b57cec5SDimitry Andric /// 35070b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35080b57cec5SDimitry Andric /// instruction. 35090b57cec5SDimitry Andric /// 35100b57cec5SDimitry Andric /// \param __q1 35110b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the upper 64 bits of the 35120b57cec5SDimitry Andric /// destination vector of [2 x i64]. 35130b57cec5SDimitry Andric /// \param __q0 35140b57cec5SDimitry Andric /// A 64-bit integer value used to initialize the lower 64 bits of the 35150b57cec5SDimitry Andric /// destination vector of [2 x i64]. 35160b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] containing the values 35170b57cec5SDimitry Andric /// provided in the operands. 351881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, 351981ad6265SDimitry Andric __m64 __q0) { 35200b57cec5SDimitry Andric return _mm_set_epi64x((long long)__q1, (long long)__q0); 35210b57cec5SDimitry Andric } 35220b57cec5SDimitry Andric 35230b57cec5SDimitry Andric /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with 35240b57cec5SDimitry Andric /// the specified 32-bit integer values. 35250b57cec5SDimitry Andric /// 35260b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35270b57cec5SDimitry Andric /// 35280b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35290b57cec5SDimitry Andric /// instruction. 35300b57cec5SDimitry Andric /// 35310b57cec5SDimitry Andric /// \param __i3 35320b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [127:96] of the 35330b57cec5SDimitry Andric /// destination vector. 35340b57cec5SDimitry Andric /// \param __i2 35350b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [95:64] of the destination 35360b57cec5SDimitry Andric /// vector. 35370b57cec5SDimitry Andric /// \param __i1 35380b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [63:32] of the destination 35390b57cec5SDimitry Andric /// vector. 35400b57cec5SDimitry Andric /// \param __i0 35410b57cec5SDimitry Andric /// A 32-bit integer value used to initialize bits [31:0] of the destination 35420b57cec5SDimitry Andric /// vector. 35430b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] containing the values 35440b57cec5SDimitry Andric /// provided in the operands. 354581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, 354681ad6265SDimitry Andric int __i1, int __i0) { 35470b57cec5SDimitry Andric return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; 35480b57cec5SDimitry Andric } 35490b57cec5SDimitry Andric 35500b57cec5SDimitry Andric /// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with 35510b57cec5SDimitry Andric /// the specified 16-bit integer values. 35520b57cec5SDimitry Andric /// 35530b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35540b57cec5SDimitry Andric /// 35550b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35560b57cec5SDimitry Andric /// instruction. 35570b57cec5SDimitry Andric /// 35580b57cec5SDimitry Andric /// \param __w7 35590b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [127:112] of the 35600b57cec5SDimitry Andric /// destination vector. 35610b57cec5SDimitry Andric /// \param __w6 35620b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [111:96] of the 35630b57cec5SDimitry Andric /// destination vector. 35640b57cec5SDimitry Andric /// \param __w5 35650b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [95:80] of the destination 35660b57cec5SDimitry Andric /// vector. 35670b57cec5SDimitry Andric /// \param __w4 35680b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [79:64] of the destination 35690b57cec5SDimitry Andric /// vector. 35700b57cec5SDimitry Andric /// \param __w3 35710b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [63:48] of the destination 35720b57cec5SDimitry Andric /// vector. 35730b57cec5SDimitry Andric /// \param __w2 35740b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [47:32] of the destination 35750b57cec5SDimitry Andric /// vector. 35760b57cec5SDimitry Andric /// \param __w1 35770b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [31:16] of the destination 35780b57cec5SDimitry Andric /// vector. 35790b57cec5SDimitry Andric /// \param __w0 35800b57cec5SDimitry Andric /// A 16-bit integer value used to initialize bits [15:0] of the destination 35810b57cec5SDimitry Andric /// vector. 35820b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] containing the values 35830b57cec5SDimitry Andric /// provided in the operands. 35840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 358581ad6265SDimitry Andric _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, 358681ad6265SDimitry Andric short __w2, short __w1, short __w0) { 358781ad6265SDimitry Andric return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, 358881ad6265SDimitry Andric __w4, __w5, __w6, __w7}; 35890b57cec5SDimitry Andric } 35900b57cec5SDimitry Andric 35910b57cec5SDimitry Andric /// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with 35920b57cec5SDimitry Andric /// the specified 8-bit integer values. 35930b57cec5SDimitry Andric /// 35940b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 35950b57cec5SDimitry Andric /// 35960b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 35970b57cec5SDimitry Andric /// instruction. 35980b57cec5SDimitry Andric /// 35990b57cec5SDimitry Andric /// \param __b15 36000b57cec5SDimitry Andric /// Initializes bits [127:120] of the destination vector. 36010b57cec5SDimitry Andric /// \param __b14 36020b57cec5SDimitry Andric /// Initializes bits [119:112] of the destination vector. 36030b57cec5SDimitry Andric /// \param __b13 36040b57cec5SDimitry Andric /// Initializes bits [111:104] of the destination vector. 36050b57cec5SDimitry Andric /// \param __b12 36060b57cec5SDimitry Andric /// Initializes bits [103:96] of the destination vector. 36070b57cec5SDimitry Andric /// \param __b11 36080b57cec5SDimitry Andric /// Initializes bits [95:88] of the destination vector. 36090b57cec5SDimitry Andric /// \param __b10 36100b57cec5SDimitry Andric /// Initializes bits [87:80] of the destination vector. 36110b57cec5SDimitry Andric /// \param __b9 36120b57cec5SDimitry Andric /// Initializes bits [79:72] of the destination vector. 36130b57cec5SDimitry Andric /// \param __b8 36140b57cec5SDimitry Andric /// Initializes bits [71:64] of the destination vector. 36150b57cec5SDimitry Andric /// \param __b7 36160b57cec5SDimitry Andric /// Initializes bits [63:56] of the destination vector. 36170b57cec5SDimitry Andric /// \param __b6 36180b57cec5SDimitry Andric /// Initializes bits [55:48] of the destination vector. 36190b57cec5SDimitry Andric /// \param __b5 36200b57cec5SDimitry Andric /// Initializes bits [47:40] of the destination vector. 36210b57cec5SDimitry Andric /// \param __b4 36220b57cec5SDimitry Andric /// Initializes bits [39:32] of the destination vector. 36230b57cec5SDimitry Andric /// \param __b3 36240b57cec5SDimitry Andric /// Initializes bits [31:24] of the destination vector. 36250b57cec5SDimitry Andric /// \param __b2 36260b57cec5SDimitry Andric /// Initializes bits [23:16] of the destination vector. 36270b57cec5SDimitry Andric /// \param __b1 36280b57cec5SDimitry Andric /// Initializes bits [15:8] of the destination vector. 36290b57cec5SDimitry Andric /// \param __b0 36300b57cec5SDimitry Andric /// Initializes bits [7:0] of the destination vector. 36310b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] containing the values 36320b57cec5SDimitry Andric /// provided in the operands. 36330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 363481ad6265SDimitry Andric _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, 363581ad6265SDimitry Andric char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, 363681ad6265SDimitry Andric char __b4, char __b3, char __b2, char __b1, char __b0) { 363781ad6265SDimitry Andric return __extension__(__m128i)(__v16qi){ 363881ad6265SDimitry Andric __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, 363981ad6265SDimitry Andric __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15}; 36400b57cec5SDimitry Andric } 36410b57cec5SDimitry Andric 36420b57cec5SDimitry Andric /// Initializes both values in a 128-bit integer vector with the 36430b57cec5SDimitry Andric /// specified 64-bit integer value. 36440b57cec5SDimitry Andric /// 36450b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36460b57cec5SDimitry Andric /// 36470b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36480b57cec5SDimitry Andric /// instruction. 36490b57cec5SDimitry Andric /// 36500b57cec5SDimitry Andric /// \param __q 36510b57cec5SDimitry Andric /// Integer value used to initialize the elements of the destination integer 36520b57cec5SDimitry Andric /// vector. 36530b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector of [2 x i64] with both 36540b57cec5SDimitry Andric /// elements containing the value provided in the operand. 365581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { 36560b57cec5SDimitry Andric return _mm_set_epi64x(__q, __q); 36570b57cec5SDimitry Andric } 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric /// Initializes both values in a 128-bit vector of [2 x i64] with the 36600b57cec5SDimitry Andric /// specified 64-bit value. 36610b57cec5SDimitry Andric /// 36620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36630b57cec5SDimitry Andric /// 36640b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36650b57cec5SDimitry Andric /// instruction. 36660b57cec5SDimitry Andric /// 36670b57cec5SDimitry Andric /// \param __q 36680b57cec5SDimitry Andric /// A 64-bit value used to initialize the elements of the destination integer 36690b57cec5SDimitry Andric /// vector. 36700b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [2 x i64] with all elements 36710b57cec5SDimitry Andric /// containing the value provided in the operand. 367281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { 36730b57cec5SDimitry Andric return _mm_set_epi64(__q, __q); 36740b57cec5SDimitry Andric } 36750b57cec5SDimitry Andric 36760b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [4 x i32] with the 36770b57cec5SDimitry Andric /// specified 32-bit value. 36780b57cec5SDimitry Andric /// 36790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36800b57cec5SDimitry Andric /// 36810b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36820b57cec5SDimitry Andric /// instruction. 36830b57cec5SDimitry Andric /// 36840b57cec5SDimitry Andric /// \param __i 36850b57cec5SDimitry Andric /// A 32-bit value used to initialize the elements of the destination integer 36860b57cec5SDimitry Andric /// vector. 36870b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [4 x i32] with all elements 36880b57cec5SDimitry Andric /// containing the value provided in the operand. 368981ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { 36900b57cec5SDimitry Andric return _mm_set_epi32(__i, __i, __i, __i); 36910b57cec5SDimitry Andric } 36920b57cec5SDimitry Andric 36930b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [8 x i16] with the 36940b57cec5SDimitry Andric /// specified 16-bit value. 36950b57cec5SDimitry Andric /// 36960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 36970b57cec5SDimitry Andric /// 36980b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 36990b57cec5SDimitry Andric /// instruction. 37000b57cec5SDimitry Andric /// 37010b57cec5SDimitry Andric /// \param __w 37020b57cec5SDimitry Andric /// A 16-bit value used to initialize the elements of the destination integer 37030b57cec5SDimitry Andric /// vector. 37040b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [8 x i16] with all elements 37050b57cec5SDimitry Andric /// containing the value provided in the operand. 370681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { 37070b57cec5SDimitry Andric return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); 37080b57cec5SDimitry Andric } 37090b57cec5SDimitry Andric 37100b57cec5SDimitry Andric /// Initializes all values in a 128-bit vector of [16 x i8] with the 37110b57cec5SDimitry Andric /// specified 8-bit value. 37120b57cec5SDimitry Andric /// 37130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37140b57cec5SDimitry Andric /// 37150b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37160b57cec5SDimitry Andric /// instruction. 37170b57cec5SDimitry Andric /// 37180b57cec5SDimitry Andric /// \param __b 37190b57cec5SDimitry Andric /// An 8-bit value used to initialize the elements of the destination integer 37200b57cec5SDimitry Andric /// vector. 37210b57cec5SDimitry Andric /// \returns An initialized 128-bit vector of [16 x i8] with all elements 37220b57cec5SDimitry Andric /// containing the value provided in the operand. 372381ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { 372481ad6265SDimitry Andric return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, 372581ad6265SDimitry Andric __b, __b, __b, __b, __b); 37260b57cec5SDimitry Andric } 37270b57cec5SDimitry Andric 37280b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37290b57cec5SDimitry Andric /// with the specified 64-bit integral values. 37300b57cec5SDimitry Andric /// 37310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37320b57cec5SDimitry Andric /// 37330b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 37340b57cec5SDimitry Andric /// 37350b57cec5SDimitry Andric /// \param __q0 37360b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the lower 64 bits of the 37370b57cec5SDimitry Andric /// result. 37380b57cec5SDimitry Andric /// \param __q1 37390b57cec5SDimitry Andric /// A 64-bit integral value used to initialize the upper 64 bits of the 37400b57cec5SDimitry Andric /// result. 37410b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 374281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, 374381ad6265SDimitry Andric __m64 __q1) { 37440b57cec5SDimitry Andric return _mm_set_epi64(__q1, __q0); 37450b57cec5SDimitry Andric } 37460b57cec5SDimitry Andric 37470b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37480b57cec5SDimitry Andric /// with the specified 32-bit integral values. 37490b57cec5SDimitry Andric /// 37500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37510b57cec5SDimitry Andric /// 37520b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37530b57cec5SDimitry Andric /// instruction. 37540b57cec5SDimitry Andric /// 37550b57cec5SDimitry Andric /// \param __i0 37560b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [31:0] of the result. 37570b57cec5SDimitry Andric /// \param __i1 37580b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [63:32] of the result. 37590b57cec5SDimitry Andric /// \param __i2 37600b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [95:64] of the result. 37610b57cec5SDimitry Andric /// \param __i3 37620b57cec5SDimitry Andric /// A 32-bit integral value used to initialize bits [127:96] of the result. 37630b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 376481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, 376581ad6265SDimitry Andric int __i2, 376681ad6265SDimitry Andric int __i3) { 37670b57cec5SDimitry Andric return _mm_set_epi32(__i3, __i2, __i1, __i0); 37680b57cec5SDimitry Andric } 37690b57cec5SDimitry Andric 37700b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 37710b57cec5SDimitry Andric /// with the specified 16-bit integral values. 37720b57cec5SDimitry Andric /// 37730b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 37740b57cec5SDimitry Andric /// 37750b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 37760b57cec5SDimitry Andric /// instruction. 37770b57cec5SDimitry Andric /// 37780b57cec5SDimitry Andric /// \param __w0 37790b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [15:0] of the result. 37800b57cec5SDimitry Andric /// \param __w1 37810b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [31:16] of the result. 37820b57cec5SDimitry Andric /// \param __w2 37830b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [47:32] of the result. 37840b57cec5SDimitry Andric /// \param __w3 37850b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [63:48] of the result. 37860b57cec5SDimitry Andric /// \param __w4 37870b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [79:64] of the result. 37880b57cec5SDimitry Andric /// \param __w5 37890b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [95:80] of the result. 37900b57cec5SDimitry Andric /// \param __w6 37910b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [111:96] of the result. 37920b57cec5SDimitry Andric /// \param __w7 37930b57cec5SDimitry Andric /// A 16-bit integral value used to initialize bits [127:112] of the result. 37940b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 37950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 379681ad6265SDimitry Andric _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, 379781ad6265SDimitry Andric short __w5, short __w6, short __w7) { 37980b57cec5SDimitry Andric return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); 37990b57cec5SDimitry Andric } 38000b57cec5SDimitry Andric 38010b57cec5SDimitry Andric /// Constructs a 128-bit integer vector, initialized in reverse order 38020b57cec5SDimitry Andric /// with the specified 8-bit integral values. 38030b57cec5SDimitry Andric /// 38040b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38050b57cec5SDimitry Andric /// 38060b57cec5SDimitry Andric /// This intrinsic is a utility function and does not correspond to a specific 38070b57cec5SDimitry Andric /// instruction. 38080b57cec5SDimitry Andric /// 38090b57cec5SDimitry Andric /// \param __b0 38100b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [7:0] of the result. 38110b57cec5SDimitry Andric /// \param __b1 38120b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [15:8] of the result. 38130b57cec5SDimitry Andric /// \param __b2 38140b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [23:16] of the result. 38150b57cec5SDimitry Andric /// \param __b3 38160b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [31:24] of the result. 38170b57cec5SDimitry Andric /// \param __b4 38180b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [39:32] of the result. 38190b57cec5SDimitry Andric /// \param __b5 38200b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [47:40] of the result. 38210b57cec5SDimitry Andric /// \param __b6 38220b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [55:48] of the result. 38230b57cec5SDimitry Andric /// \param __b7 38240b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [63:56] of the result. 38250b57cec5SDimitry Andric /// \param __b8 38260b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [71:64] of the result. 38270b57cec5SDimitry Andric /// \param __b9 38280b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [79:72] of the result. 38290b57cec5SDimitry Andric /// \param __b10 38300b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [87:80] of the result. 38310b57cec5SDimitry Andric /// \param __b11 38320b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [95:88] of the result. 38330b57cec5SDimitry Andric /// \param __b12 38340b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [103:96] of the result. 38350b57cec5SDimitry Andric /// \param __b13 38360b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [111:104] of the result. 38370b57cec5SDimitry Andric /// \param __b14 38380b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [119:112] of the result. 38390b57cec5SDimitry Andric /// \param __b15 38400b57cec5SDimitry Andric /// An 8-bit integral value used to initialize bits [127:120] of the result. 38410b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector. 38420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 384381ad6265SDimitry Andric _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 384481ad6265SDimitry Andric char __b6, char __b7, char __b8, char __b9, char __b10, 384581ad6265SDimitry Andric char __b11, char __b12, char __b13, char __b14, char __b15) { 384681ad6265SDimitry Andric return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, 384781ad6265SDimitry Andric __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 38480b57cec5SDimitry Andric } 38490b57cec5SDimitry Andric 38500b57cec5SDimitry Andric /// Creates a 128-bit integer vector initialized to zero. 38510b57cec5SDimitry Andric /// 38520b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38530b57cec5SDimitry Andric /// 38540b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction. 38550b57cec5SDimitry Andric /// 38560b57cec5SDimitry Andric /// \returns An initialized 128-bit integer vector with all elements set to 38570b57cec5SDimitry Andric /// zero. 385881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { 38590b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){0LL, 0LL}; 38600b57cec5SDimitry Andric } 38610b57cec5SDimitry Andric 38620b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a memory location aligned on a 38630b57cec5SDimitry Andric /// 128-bit boundary. 38640b57cec5SDimitry Andric /// 38650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38660b57cec5SDimitry Andric /// 38670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction. 38680b57cec5SDimitry Andric /// 38690b57cec5SDimitry Andric /// \param __p 38700b57cec5SDimitry Andric /// A pointer to an aligned memory location that will receive the integer 38710b57cec5SDimitry Andric /// values. 38720b57cec5SDimitry Andric /// \param __b 38730b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 387481ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, 387581ad6265SDimitry Andric __m128i __b) { 38760b57cec5SDimitry Andric *__p = __b; 38770b57cec5SDimitry Andric } 38780b57cec5SDimitry Andric 38790b57cec5SDimitry Andric /// Stores a 128-bit integer vector to an unaligned memory location. 38800b57cec5SDimitry Andric /// 38810b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 38820b57cec5SDimitry Andric /// 38830b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction. 38840b57cec5SDimitry Andric /// 38850b57cec5SDimitry Andric /// \param __p 38860b57cec5SDimitry Andric /// A pointer to a memory location that will receive the integer values. 38870b57cec5SDimitry Andric /// \param __b 38880b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 388981ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, 389081ad6265SDimitry Andric __m128i __b) { 38910b57cec5SDimitry Andric struct __storeu_si128 { 38920b57cec5SDimitry Andric __m128i_u __v; 38930b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 38940b57cec5SDimitry Andric ((struct __storeu_si128 *)__p)->__v = __b; 38950b57cec5SDimitry Andric } 38960b57cec5SDimitry Andric 38970b57cec5SDimitry Andric /// Stores a 64-bit integer value from the low element of a 128-bit integer 38980b57cec5SDimitry Andric /// vector. 38990b57cec5SDimitry Andric /// 39000b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39010b57cec5SDimitry Andric /// 39020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 39030b57cec5SDimitry Andric /// 39040b57cec5SDimitry Andric /// \param __p 39050b57cec5SDimitry Andric /// A pointer to a 64-bit memory location. The address of the memory 3906e8d8bef9SDimitry Andric /// location does not have to be aligned. 39070b57cec5SDimitry Andric /// \param __b 39080b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 390981ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, 391081ad6265SDimitry Andric __m128i __b) { 39110b57cec5SDimitry Andric struct __storeu_si64 { 39120b57cec5SDimitry Andric long long __v; 39130b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 39140b57cec5SDimitry Andric ((struct __storeu_si64 *)__p)->__v = ((__v2di)__b)[0]; 39150b57cec5SDimitry Andric } 39160b57cec5SDimitry Andric 39170b57cec5SDimitry Andric /// Stores a 32-bit integer value from the low element of a 128-bit integer 39180b57cec5SDimitry Andric /// vector. 39190b57cec5SDimitry Andric /// 39200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39210b57cec5SDimitry Andric /// 39220b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 39230b57cec5SDimitry Andric /// 39240b57cec5SDimitry Andric /// \param __p 39250b57cec5SDimitry Andric /// A pointer to a 32-bit memory location. The address of the memory 39260b57cec5SDimitry Andric /// location does not have to be aligned. 39270b57cec5SDimitry Andric /// \param __b 39280b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 392981ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, 393081ad6265SDimitry Andric __m128i __b) { 39310b57cec5SDimitry Andric struct __storeu_si32 { 39320b57cec5SDimitry Andric int __v; 39330b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 39340b57cec5SDimitry Andric ((struct __storeu_si32 *)__p)->__v = ((__v4si)__b)[0]; 39350b57cec5SDimitry Andric } 39360b57cec5SDimitry Andric 39370b57cec5SDimitry Andric /// Stores a 16-bit integer value from the low element of a 128-bit integer 39380b57cec5SDimitry Andric /// vector. 39390b57cec5SDimitry Andric /// 39400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39410b57cec5SDimitry Andric /// 39420b57cec5SDimitry Andric /// This intrinsic does not correspond to a specific instruction. 39430b57cec5SDimitry Andric /// 39440b57cec5SDimitry Andric /// \param __p 39450b57cec5SDimitry Andric /// A pointer to a 16-bit memory location. The address of the memory 39460b57cec5SDimitry Andric /// location does not have to be aligned. 39470b57cec5SDimitry Andric /// \param __b 39480b57cec5SDimitry Andric /// A 128-bit integer vector containing the value to be stored. 394981ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, 395081ad6265SDimitry Andric __m128i __b) { 39510b57cec5SDimitry Andric struct __storeu_si16 { 39520b57cec5SDimitry Andric short __v; 39530b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 39540b57cec5SDimitry Andric ((struct __storeu_si16 *)__p)->__v = ((__v8hi)__b)[0]; 39550b57cec5SDimitry Andric } 39560b57cec5SDimitry Andric 39570b57cec5SDimitry Andric /// Moves bytes selected by the mask from the first operand to the 39580b57cec5SDimitry Andric /// specified unaligned memory location. When a mask bit is 1, the 39590b57cec5SDimitry Andric /// corresponding byte is written, otherwise it is not written. 39600b57cec5SDimitry Andric /// 39610b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 39620b57cec5SDimitry Andric /// used again soon). Exception and trap behavior for elements not selected 39630b57cec5SDimitry Andric /// for storage to memory are implementation dependent. 39640b57cec5SDimitry Andric /// 39650b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39660b57cec5SDimitry Andric /// 39670b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c> 39680b57cec5SDimitry Andric /// instruction. 39690b57cec5SDimitry Andric /// 39700b57cec5SDimitry Andric /// \param __d 39710b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be moved. 39720b57cec5SDimitry Andric /// \param __n 39730b57cec5SDimitry Andric /// A 128-bit integer vector containing the mask. The most significant bit of 39740b57cec5SDimitry Andric /// each byte represents the mask bits. 39750b57cec5SDimitry Andric /// \param __p 39760b57cec5SDimitry Andric /// A pointer to an unaligned 128-bit memory location where the specified 39770b57cec5SDimitry Andric /// values are moved. 397881ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, 397981ad6265SDimitry Andric __m128i __n, 398081ad6265SDimitry Andric char *__p) { 39810b57cec5SDimitry Andric __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 39820b57cec5SDimitry Andric } 39830b57cec5SDimitry Andric 39840b57cec5SDimitry Andric /// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to 39850b57cec5SDimitry Andric /// a memory location. 39860b57cec5SDimitry Andric /// 39870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 39880b57cec5SDimitry Andric /// 39890b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction. 39900b57cec5SDimitry Andric /// 39910b57cec5SDimitry Andric /// \param __p 39920b57cec5SDimitry Andric /// A pointer to a 64-bit memory location that will receive the lower 64 bits 39930b57cec5SDimitry Andric /// of the integer vector parameter. 39940b57cec5SDimitry Andric /// \param __a 39950b57cec5SDimitry Andric /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the 39960b57cec5SDimitry Andric /// value to be stored. 399781ad6265SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, 399881ad6265SDimitry Andric __m128i __a) { 39990b57cec5SDimitry Andric struct __mm_storel_epi64_struct { 40000b57cec5SDimitry Andric long long __u; 40010b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 40020b57cec5SDimitry Andric ((struct __mm_storel_epi64_struct *)__p)->__u = __a[0]; 40030b57cec5SDimitry Andric } 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit 40060b57cec5SDimitry Andric /// aligned memory location. 40070b57cec5SDimitry Andric /// 40080b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40090b57cec5SDimitry Andric /// used again soon). 40100b57cec5SDimitry Andric /// 40110b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40120b57cec5SDimitry Andric /// 40130b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 40140b57cec5SDimitry Andric /// 40150b57cec5SDimitry Andric /// \param __p 40160b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 40170b57cec5SDimitry Andric /// \param __a 40180b57cec5SDimitry Andric /// A vector of [2 x double] containing the 64-bit values to be stored. 40195f757f3fSDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p, 402081ad6265SDimitry Andric __m128d __a) { 40210b57cec5SDimitry Andric __builtin_nontemporal_store((__v2df)__a, (__v2df *)__p); 40220b57cec5SDimitry Andric } 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric /// Stores a 128-bit integer vector to a 128-bit aligned memory location. 40250b57cec5SDimitry Andric /// 40260b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40270b57cec5SDimitry Andric /// used again soon). 40280b57cec5SDimitry Andric /// 40290b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40300b57cec5SDimitry Andric /// 40310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction. 40320b57cec5SDimitry Andric /// 40330b57cec5SDimitry Andric /// \param __p 40340b57cec5SDimitry Andric /// A pointer to the 128-bit aligned memory location used to store the value. 40350b57cec5SDimitry Andric /// \param __a 40360b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be stored. 40375f757f3fSDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p, 403881ad6265SDimitry Andric __m128i __a) { 40390b57cec5SDimitry Andric __builtin_nontemporal_store((__v2di)__a, (__v2di *)__p); 40400b57cec5SDimitry Andric } 40410b57cec5SDimitry Andric 40420b57cec5SDimitry Andric /// Stores a 32-bit integer value in the specified memory location. 40430b57cec5SDimitry Andric /// 40440b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40450b57cec5SDimitry Andric /// used again soon). 40460b57cec5SDimitry Andric /// 40470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40480b57cec5SDimitry Andric /// 40490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTI </c> instruction. 40500b57cec5SDimitry Andric /// 40510b57cec5SDimitry Andric /// \param __p 40520b57cec5SDimitry Andric /// A pointer to the 32-bit memory location used to store the value. 40530b57cec5SDimitry Andric /// \param __a 40540b57cec5SDimitry Andric /// A 32-bit integer containing the value to be stored. 405581ad6265SDimitry Andric static __inline__ void 405681ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 40575f757f3fSDimitry Andric _mm_stream_si32(void *__p, int __a) { 40585f757f3fSDimitry Andric __builtin_ia32_movnti((int *)__p, __a); 40590b57cec5SDimitry Andric } 40600b57cec5SDimitry Andric 40610b57cec5SDimitry Andric #ifdef __x86_64__ 40620b57cec5SDimitry Andric /// Stores a 64-bit integer value in the specified memory location. 40630b57cec5SDimitry Andric /// 40640b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 40650b57cec5SDimitry Andric /// used again soon). 40660b57cec5SDimitry Andric /// 40670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40680b57cec5SDimitry Andric /// 40690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction. 40700b57cec5SDimitry Andric /// 40710b57cec5SDimitry Andric /// \param __p 40720b57cec5SDimitry Andric /// A pointer to the 64-bit memory location used to store the value. 40730b57cec5SDimitry Andric /// \param __a 40740b57cec5SDimitry Andric /// A 64-bit integer containing the value to be stored. 407581ad6265SDimitry Andric static __inline__ void 407681ad6265SDimitry Andric __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) 40775f757f3fSDimitry Andric _mm_stream_si64(void *__p, long long __a) { 40785f757f3fSDimitry Andric __builtin_ia32_movnti64((long long *)__p, __a); 40790b57cec5SDimitry Andric } 40800b57cec5SDimitry Andric #endif 40810b57cec5SDimitry Andric 40820b57cec5SDimitry Andric #if defined(__cplusplus) 40830b57cec5SDimitry Andric extern "C" { 40840b57cec5SDimitry Andric #endif 40850b57cec5SDimitry Andric 40860b57cec5SDimitry Andric /// The cache line containing \a __p is flushed and invalidated from all 40870b57cec5SDimitry Andric /// caches in the coherency domain. 40880b57cec5SDimitry Andric /// 40890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 40900b57cec5SDimitry Andric /// 40910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> CLFLUSH </c> instruction. 40920b57cec5SDimitry Andric /// 40930b57cec5SDimitry Andric /// \param __p 40940b57cec5SDimitry Andric /// A pointer to the memory location used to identify the cache line to be 40950b57cec5SDimitry Andric /// flushed. 40960b57cec5SDimitry Andric void _mm_clflush(void const *__p); 40970b57cec5SDimitry Andric 40980b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load 40990b57cec5SDimitry Andric /// instructions preceding this instruction and load instructions following 41000b57cec5SDimitry Andric /// this instruction, ensuring the system completes all previous loads before 41010b57cec5SDimitry Andric /// executing subsequent loads. 41020b57cec5SDimitry Andric /// 41030b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41040b57cec5SDimitry Andric /// 41050b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> LFENCE </c> instruction. 41060b57cec5SDimitry Andric /// 41070b57cec5SDimitry Andric void _mm_lfence(void); 41080b57cec5SDimitry Andric 41090b57cec5SDimitry Andric /// Forces strong memory ordering (serialization) between load and store 41100b57cec5SDimitry Andric /// instructions preceding this instruction and load and store instructions 41110b57cec5SDimitry Andric /// following this instruction, ensuring that the system completes all 41120b57cec5SDimitry Andric /// previous memory accesses before executing subsequent memory accesses. 41130b57cec5SDimitry Andric /// 41140b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41150b57cec5SDimitry Andric /// 41160b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MFENCE </c> instruction. 41170b57cec5SDimitry Andric /// 41180b57cec5SDimitry Andric void _mm_mfence(void); 41190b57cec5SDimitry Andric 41200b57cec5SDimitry Andric #if defined(__cplusplus) 41210b57cec5SDimitry Andric } // extern "C" 41220b57cec5SDimitry Andric #endif 41230b57cec5SDimitry Andric 41240fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 128-bit integer 41250fca6ea1SDimitry Andric /// vector operands into 8-bit signed integers, and packs the results into 41260fca6ea1SDimitry Andric /// the destination. 41270fca6ea1SDimitry Andric /// 41280fca6ea1SDimitry Andric /// Positive values greater than 0x7F are saturated to 0x7F. Negative values 41290fca6ea1SDimitry Andric /// less than 0x80 are saturated to 0x80. 41300b57cec5SDimitry Andric /// 41310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41320b57cec5SDimitry Andric /// 41330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction. 41340b57cec5SDimitry Andric /// 41350b57cec5SDimitry Andric /// \param __a 41360fca6ea1SDimitry Andric /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are 41370b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 41380b57cec5SDimitry Andric /// \param __b 41390fca6ea1SDimitry Andric /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are 41400b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 41410b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 414281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, 414381ad6265SDimitry Andric __m128i __b) { 41440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 41450b57cec5SDimitry Andric } 41460b57cec5SDimitry Andric 41470fca6ea1SDimitry Andric /// Converts, with saturation, 32-bit signed integers from both 128-bit integer 41480fca6ea1SDimitry Andric /// vector operands into 16-bit signed integers, and packs the results into 41490fca6ea1SDimitry Andric /// the destination. 41500fca6ea1SDimitry Andric /// 41510fca6ea1SDimitry Andric /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative 41520fca6ea1SDimitry Andric /// values less than 0x8000 are saturated to 0x8000. 41530b57cec5SDimitry Andric /// 41540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41550b57cec5SDimitry Andric /// 41560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction. 41570b57cec5SDimitry Andric /// 41580b57cec5SDimitry Andric /// \param __a 41590fca6ea1SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values 41600b57cec5SDimitry Andric /// are written to the lower 64 bits of the result. 41610b57cec5SDimitry Andric /// \param __b 41620fca6ea1SDimitry Andric /// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values 41630b57cec5SDimitry Andric /// are written to the higher 64 bits of the result. 41640b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the converted values. 416581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, 416681ad6265SDimitry Andric __m128i __b) { 41670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 41680b57cec5SDimitry Andric } 41690b57cec5SDimitry Andric 41700fca6ea1SDimitry Andric /// Converts, with saturation, 16-bit signed integers from both 128-bit integer 41710fca6ea1SDimitry Andric /// vector operands into 8-bit unsigned integers, and packs the results into 41720fca6ea1SDimitry Andric /// the destination. 41730fca6ea1SDimitry Andric /// 41740fca6ea1SDimitry Andric /// Values greater than 0xFF are saturated to 0xFF. Values less than 0x00 41750fca6ea1SDimitry Andric /// are saturated to 0x00. 41760b57cec5SDimitry Andric /// 41770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41780b57cec5SDimitry Andric /// 41790b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction. 41800b57cec5SDimitry Andric /// 41810b57cec5SDimitry Andric /// \param __a 41820fca6ea1SDimitry Andric /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are 41830b57cec5SDimitry Andric /// written to the lower 64 bits of the result. 41840b57cec5SDimitry Andric /// \param __b 41850fca6ea1SDimitry Andric /// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are 41860b57cec5SDimitry Andric /// written to the higher 64 bits of the result. 41870b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the converted values. 418881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, 418981ad6265SDimitry Andric __m128i __b) { 41900b57cec5SDimitry Andric return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 41910b57cec5SDimitry Andric } 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric /// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using 41940b57cec5SDimitry Andric /// the immediate-value parameter as a selector. 41950b57cec5SDimitry Andric /// 41960b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 41970b57cec5SDimitry Andric /// 419881ad6265SDimitry Andric /// \code 419981ad6265SDimitry Andric /// __m128i _mm_extract_epi16(__m128i a, const int imm); 420081ad6265SDimitry Andric /// \endcode 420181ad6265SDimitry Andric /// 42020b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction. 42030b57cec5SDimitry Andric /// 420481ad6265SDimitry Andric /// \param a 42050b57cec5SDimitry Andric /// A 128-bit integer vector. 420681ad6265SDimitry Andric /// \param imm 420781ad6265SDimitry Andric /// An immediate value. Bits [2:0] selects values from \a a to be assigned 42080b57cec5SDimitry Andric /// to bits[15:0] of the result. \n 420981ad6265SDimitry Andric /// 000: assign values from bits [15:0] of \a a. \n 421081ad6265SDimitry Andric /// 001: assign values from bits [31:16] of \a a. \n 421181ad6265SDimitry Andric /// 010: assign values from bits [47:32] of \a a. \n 421281ad6265SDimitry Andric /// 011: assign values from bits [63:48] of \a a. \n 421381ad6265SDimitry Andric /// 100: assign values from bits [79:64] of \a a. \n 421481ad6265SDimitry Andric /// 101: assign values from bits [95:80] of \a a. \n 421581ad6265SDimitry Andric /// 110: assign values from bits [111:96] of \a a. \n 421681ad6265SDimitry Andric /// 111: assign values from bits [127:112] of \a a. 42170b57cec5SDimitry Andric /// \returns An integer, whose lower 16 bits are selected from the 128-bit 42180b57cec5SDimitry Andric /// integer vector parameter and the remaining bits are assigned zeros. 42190b57cec5SDimitry Andric #define _mm_extract_epi16(a, imm) \ 4220349cc55cSDimitry Andric ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ 4221349cc55cSDimitry Andric (int)(imm))) 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by first making a copy of the 42240b57cec5SDimitry Andric /// 128-bit integer vector parameter, and then inserting the lower 16 bits 42250b57cec5SDimitry Andric /// of an integer parameter into an offset specified by the immediate-value 42260b57cec5SDimitry Andric /// parameter. 42270b57cec5SDimitry Andric /// 42280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42290b57cec5SDimitry Andric /// 423081ad6265SDimitry Andric /// \code 423181ad6265SDimitry Andric /// __m128i _mm_insert_epi16(__m128i a, int b, const int imm); 423281ad6265SDimitry Andric /// \endcode 423381ad6265SDimitry Andric /// 42340b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction. 42350b57cec5SDimitry Andric /// 423681ad6265SDimitry Andric /// \param a 42370b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. This vector is copied to the 42380b57cec5SDimitry Andric /// result and then one of the eight elements in the result is replaced by 423981ad6265SDimitry Andric /// the lower 16 bits of \a b. 424081ad6265SDimitry Andric /// \param b 42410b57cec5SDimitry Andric /// An integer. The lower 16 bits of this parameter are written to the 424281ad6265SDimitry Andric /// result beginning at an offset specified by \a imm. 424381ad6265SDimitry Andric /// \param imm 42440b57cec5SDimitry Andric /// An immediate value specifying the bit offset in the result at which the 424581ad6265SDimitry Andric /// lower 16 bits of \a b are written. 42460b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the constructed values. 42470b57cec5SDimitry Andric #define _mm_insert_epi16(a, b, imm) \ 4248349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ 4249349cc55cSDimitry Andric (int)(imm))) 42500b57cec5SDimitry Andric 42510b57cec5SDimitry Andric /// Copies the values of the most significant bits from each 8-bit 42520b57cec5SDimitry Andric /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask 42530b57cec5SDimitry Andric /// value, zero-extends the value, and writes it to the destination. 42540b57cec5SDimitry Andric /// 42550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42560b57cec5SDimitry Andric /// 42570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction. 42580b57cec5SDimitry Andric /// 42590b57cec5SDimitry Andric /// \param __a 42600b57cec5SDimitry Andric /// A 128-bit integer vector containing the values with bits to be extracted. 42610b57cec5SDimitry Andric /// \returns The most significant bits from each 8-bit element in \a __a, 42620b57cec5SDimitry Andric /// written to bits [15:0]. The other bits are assigned zeros. 426381ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { 42640b57cec5SDimitry Andric return __builtin_ia32_pmovmskb128((__v16qi)__a); 42650b57cec5SDimitry Andric } 42660b57cec5SDimitry Andric 42670b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four 32-bit 42680b57cec5SDimitry Andric /// elements of a 128-bit integer vector parameter, using the immediate-value 42690b57cec5SDimitry Andric /// parameter as a specifier. 42700b57cec5SDimitry Andric /// 42710b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 42720b57cec5SDimitry Andric /// 42730b57cec5SDimitry Andric /// \code 42740b57cec5SDimitry Andric /// __m128i _mm_shuffle_epi32(__m128i a, const int imm); 42750b57cec5SDimitry Andric /// \endcode 42760b57cec5SDimitry Andric /// 42770b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction. 42780b57cec5SDimitry Andric /// 42790b57cec5SDimitry Andric /// \param a 42800b57cec5SDimitry Andric /// A 128-bit integer vector containing the values to be copied. 42810b57cec5SDimitry Andric /// \param imm 42820b57cec5SDimitry Andric /// An immediate value containing an 8-bit value specifying which elements to 42830b57cec5SDimitry Andric /// copy from a. The destinations within the 128-bit destination are assigned 42840b57cec5SDimitry Andric /// values as follows: \n 42850b57cec5SDimitry Andric /// Bits [1:0] are used to assign values to bits [31:0] of the result. \n 42860b57cec5SDimitry Andric /// Bits [3:2] are used to assign values to bits [63:32] of the result. \n 42870b57cec5SDimitry Andric /// Bits [5:4] are used to assign values to bits [95:64] of the result. \n 42880b57cec5SDimitry Andric /// Bits [7:6] are used to assign values to bits [127:96] of the result. \n 42890b57cec5SDimitry Andric /// Bit value assignments: \n 42900b57cec5SDimitry Andric /// 00: assign values from bits [31:0] of \a a. \n 42910b57cec5SDimitry Andric /// 01: assign values from bits [63:32] of \a a. \n 42920b57cec5SDimitry Andric /// 10: assign values from bits [95:64] of \a a. \n 429381ad6265SDimitry Andric /// 11: assign values from bits [127:96] of \a a. \n 429481ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 429581ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 429681ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 42970b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 42980b57cec5SDimitry Andric #define _mm_shuffle_epi32(a, imm) \ 4299349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) 43000b57cec5SDimitry Andric 43010b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four lower 16-bit 43020b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 43030b57cec5SDimitry Andric /// value parameter as a specifier. 43040b57cec5SDimitry Andric /// 43050b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43060b57cec5SDimitry Andric /// 43070b57cec5SDimitry Andric /// \code 43080b57cec5SDimitry Andric /// __m128i _mm_shufflelo_epi16(__m128i a, const int imm); 43090b57cec5SDimitry Andric /// \endcode 43100b57cec5SDimitry Andric /// 43110b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction. 43120b57cec5SDimitry Andric /// 43130b57cec5SDimitry Andric /// \param a 43140b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits 43150b57cec5SDimitry Andric /// [127:64] of the result. 43160b57cec5SDimitry Andric /// \param imm 43170b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 43180b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [15:0] of the result. \n 43190b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [31:16] of the result. \n 43200b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [47:32] of the result. \n 43210b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [63:48] of the result. \n 43220b57cec5SDimitry Andric /// Bit value assignments: \n 43230b57cec5SDimitry Andric /// 00: assign values from bits [15:0] of \a a. \n 43240b57cec5SDimitry Andric /// 01: assign values from bits [31:16] of \a a. \n 43250b57cec5SDimitry Andric /// 10: assign values from bits [47:32] of \a a. \n 43260b57cec5SDimitry Andric /// 11: assign values from bits [63:48] of \a a. \n 432781ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 432881ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 432981ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 43300b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 43310b57cec5SDimitry Andric #define _mm_shufflelo_epi16(a, imm) \ 4332349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) 43330b57cec5SDimitry Andric 43340b57cec5SDimitry Andric /// Constructs a 128-bit integer vector by shuffling four upper 16-bit 43350b57cec5SDimitry Andric /// elements of a 128-bit integer vector of [8 x i16], using the immediate 43360b57cec5SDimitry Andric /// value parameter as a specifier. 43370b57cec5SDimitry Andric /// 43380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43390b57cec5SDimitry Andric /// 43400b57cec5SDimitry Andric /// \code 43410b57cec5SDimitry Andric /// __m128i _mm_shufflehi_epi16(__m128i a, const int imm); 43420b57cec5SDimitry Andric /// \endcode 43430b57cec5SDimitry Andric /// 43440b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction. 43450b57cec5SDimitry Andric /// 43460b57cec5SDimitry Andric /// \param a 43470b57cec5SDimitry Andric /// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits 43480b57cec5SDimitry Andric /// [63:0] of the result. 43490b57cec5SDimitry Andric /// \param imm 43500b57cec5SDimitry Andric /// An 8-bit immediate value specifying which elements to copy from \a a. \n 43510b57cec5SDimitry Andric /// Bits[1:0] are used to assign values to bits [79:64] of the result. \n 43520b57cec5SDimitry Andric /// Bits[3:2] are used to assign values to bits [95:80] of the result. \n 43530b57cec5SDimitry Andric /// Bits[5:4] are used to assign values to bits [111:96] of the result. \n 43540b57cec5SDimitry Andric /// Bits[7:6] are used to assign values to bits [127:112] of the result. \n 43550b57cec5SDimitry Andric /// Bit value assignments: \n 43560b57cec5SDimitry Andric /// 00: assign values from bits [79:64] of \a a. \n 43570b57cec5SDimitry Andric /// 01: assign values from bits [95:80] of \a a. \n 43580b57cec5SDimitry Andric /// 10: assign values from bits [111:96] of \a a. \n 43590b57cec5SDimitry Andric /// 11: assign values from bits [127:112] of \a a. \n 436081ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. 436181ad6265SDimitry Andric /// <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form 436281ad6265SDimitry Andric /// <c>[b6, b4, b2, b0]</c>. 43630b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the shuffled values. 43640b57cec5SDimitry Andric #define _mm_shufflehi_epi16(a, imm) \ 4365349cc55cSDimitry Andric ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) 43660b57cec5SDimitry Andric 43670b57cec5SDimitry Andric /// Unpacks the high-order (index 8-15) values from two 128-bit vectors 43680b57cec5SDimitry Andric /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 43690b57cec5SDimitry Andric /// 43700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 43710b57cec5SDimitry Andric /// 43720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c> 43730b57cec5SDimitry Andric /// instruction. 43740b57cec5SDimitry Andric /// 43750b57cec5SDimitry Andric /// \param __a 43760b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 43770b57cec5SDimitry Andric /// Bits [71:64] are written to bits [7:0] of the result. \n 43780b57cec5SDimitry Andric /// Bits [79:72] are written to bits [23:16] of the result. \n 43790b57cec5SDimitry Andric /// Bits [87:80] are written to bits [39:32] of the result. \n 43800b57cec5SDimitry Andric /// Bits [95:88] are written to bits [55:48] of the result. \n 43810b57cec5SDimitry Andric /// Bits [103:96] are written to bits [71:64] of the result. \n 43820b57cec5SDimitry Andric /// Bits [111:104] are written to bits [87:80] of the result. \n 43830b57cec5SDimitry Andric /// Bits [119:112] are written to bits [103:96] of the result. \n 43840b57cec5SDimitry Andric /// Bits [127:120] are written to bits [119:112] of the result. 43850b57cec5SDimitry Andric /// \param __b 43860b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 43870b57cec5SDimitry Andric /// Bits [71:64] are written to bits [15:8] of the result. \n 43880b57cec5SDimitry Andric /// Bits [79:72] are written to bits [31:24] of the result. \n 43890b57cec5SDimitry Andric /// Bits [87:80] are written to bits [47:40] of the result. \n 43900b57cec5SDimitry Andric /// Bits [95:88] are written to bits [63:56] of the result. \n 43910b57cec5SDimitry Andric /// Bits [103:96] are written to bits [79:72] of the result. \n 43920b57cec5SDimitry Andric /// Bits [111:104] are written to bits [95:88] of the result. \n 43930b57cec5SDimitry Andric /// Bits [119:112] are written to bits [111:104] of the result. \n 43940b57cec5SDimitry Andric /// Bits [127:120] are written to bits [127:120] of the result. 43950b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 439681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, 439781ad6265SDimitry Andric __m128i __b) { 439881ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 439981ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 440081ad6265SDimitry Andric 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15); 44010b57cec5SDimitry Andric } 44020b57cec5SDimitry Andric 44030b57cec5SDimitry Andric /// Unpacks the high-order (index 4-7) values from two 128-bit vectors of 44040b57cec5SDimitry Andric /// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16]. 44050b57cec5SDimitry Andric /// 44060b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44070b57cec5SDimitry Andric /// 44080b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c> 44090b57cec5SDimitry Andric /// instruction. 44100b57cec5SDimitry Andric /// 44110b57cec5SDimitry Andric /// \param __a 44120b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44130b57cec5SDimitry Andric /// Bits [79:64] are written to bits [15:0] of the result. \n 44140b57cec5SDimitry Andric /// Bits [95:80] are written to bits [47:32] of the result. \n 44150b57cec5SDimitry Andric /// Bits [111:96] are written to bits [79:64] of the result. \n 44160b57cec5SDimitry Andric /// Bits [127:112] are written to bits [111:96] of the result. 44170b57cec5SDimitry Andric /// \param __b 44180b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 44190b57cec5SDimitry Andric /// Bits [79:64] are written to bits [31:16] of the result. \n 44200b57cec5SDimitry Andric /// Bits [95:80] are written to bits [63:48] of the result. \n 44210b57cec5SDimitry Andric /// Bits [111:96] are written to bits [95:80] of the result. \n 44220b57cec5SDimitry Andric /// Bits [127:112] are written to bits [127:112] of the result. 44230b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 442481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, 442581ad6265SDimitry Andric __m128i __b) { 442681ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5, 442781ad6265SDimitry Andric 8 + 5, 6, 8 + 6, 7, 8 + 7); 44280b57cec5SDimitry Andric } 44290b57cec5SDimitry Andric 44300b57cec5SDimitry Andric /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of 44310b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 44320b57cec5SDimitry Andric /// 44330b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44340b57cec5SDimitry Andric /// 44350b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c> 44360b57cec5SDimitry Andric /// instruction. 44370b57cec5SDimitry Andric /// 44380b57cec5SDimitry Andric /// \param __a 44390b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44400b57cec5SDimitry Andric /// Bits [95:64] are written to bits [31:0] of the destination. \n 44410b57cec5SDimitry Andric /// Bits [127:96] are written to bits [95:64] of the destination. 44420b57cec5SDimitry Andric /// \param __b 44430b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 44440b57cec5SDimitry Andric /// Bits [95:64] are written to bits [64:32] of the destination. \n 44450b57cec5SDimitry Andric /// Bits [127:96] are written to bits [127:96] of the destination. 44460b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 444781ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, 444881ad6265SDimitry Andric __m128i __b) { 444981ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3, 445081ad6265SDimitry Andric 4 + 3); 44510b57cec5SDimitry Andric } 44520b57cec5SDimitry Andric 44530b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 44540b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 44550b57cec5SDimitry Andric /// 44560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44570b57cec5SDimitry Andric /// 44580b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c> 44590b57cec5SDimitry Andric /// instruction. 44600b57cec5SDimitry Andric /// 44610b57cec5SDimitry Andric /// \param __a 44620b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 44630b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 44640b57cec5SDimitry Andric /// \param __b 44650b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 44660b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 44670b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 446881ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, 446981ad6265SDimitry Andric __m128i __b) { 44700b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1); 44710b57cec5SDimitry Andric } 44720b57cec5SDimitry Andric 44730b57cec5SDimitry Andric /// Unpacks the low-order (index 0-7) values from two 128-bit vectors of 44740b57cec5SDimitry Andric /// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. 44750b57cec5SDimitry Andric /// 44760b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 44770b57cec5SDimitry Andric /// 44780b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c> 44790b57cec5SDimitry Andric /// instruction. 44800b57cec5SDimitry Andric /// 44810b57cec5SDimitry Andric /// \param __a 44820b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. \n 44830b57cec5SDimitry Andric /// Bits [7:0] are written to bits [7:0] of the result. \n 44840b57cec5SDimitry Andric /// Bits [15:8] are written to bits [23:16] of the result. \n 44850b57cec5SDimitry Andric /// Bits [23:16] are written to bits [39:32] of the result. \n 44860b57cec5SDimitry Andric /// Bits [31:24] are written to bits [55:48] of the result. \n 44870b57cec5SDimitry Andric /// Bits [39:32] are written to bits [71:64] of the result. \n 44880b57cec5SDimitry Andric /// Bits [47:40] are written to bits [87:80] of the result. \n 44890b57cec5SDimitry Andric /// Bits [55:48] are written to bits [103:96] of the result. \n 44900b57cec5SDimitry Andric /// Bits [63:56] are written to bits [119:112] of the result. 44910b57cec5SDimitry Andric /// \param __b 44920b57cec5SDimitry Andric /// A 128-bit vector of [16 x i8]. 44930b57cec5SDimitry Andric /// Bits [7:0] are written to bits [15:8] of the result. \n 44940b57cec5SDimitry Andric /// Bits [15:8] are written to bits [31:24] of the result. \n 44950b57cec5SDimitry Andric /// Bits [23:16] are written to bits [47:40] of the result. \n 44960b57cec5SDimitry Andric /// Bits [31:24] are written to bits [63:56] of the result. \n 44970b57cec5SDimitry Andric /// Bits [39:32] are written to bits [79:72] of the result. \n 44980b57cec5SDimitry Andric /// Bits [47:40] are written to bits [95:88] of the result. \n 44990b57cec5SDimitry Andric /// Bits [55:48] are written to bits [111:104] of the result. \n 45000b57cec5SDimitry Andric /// Bits [63:56] are written to bits [127:120] of the result. 45010b57cec5SDimitry Andric /// \returns A 128-bit vector of [16 x i8] containing the interleaved values. 450281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, 450381ad6265SDimitry Andric __m128i __b) { 450481ad6265SDimitry Andric return (__m128i)__builtin_shufflevector( 450581ad6265SDimitry Andric (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 450681ad6265SDimitry Andric 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7); 45070b57cec5SDimitry Andric } 45080b57cec5SDimitry Andric 45090b57cec5SDimitry Andric /// Unpacks the low-order (index 0-3) values from each of the two 128-bit 45100b57cec5SDimitry Andric /// vectors of [8 x i16] and interleaves them into a 128-bit vector of 45110b57cec5SDimitry Andric /// [8 x i16]. 45120b57cec5SDimitry Andric /// 45130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45140b57cec5SDimitry Andric /// 45150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c> 45160b57cec5SDimitry Andric /// instruction. 45170b57cec5SDimitry Andric /// 45180b57cec5SDimitry Andric /// \param __a 45190b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45200b57cec5SDimitry Andric /// Bits [15:0] are written to bits [15:0] of the result. \n 45210b57cec5SDimitry Andric /// Bits [31:16] are written to bits [47:32] of the result. \n 45220b57cec5SDimitry Andric /// Bits [47:32] are written to bits [79:64] of the result. \n 45230b57cec5SDimitry Andric /// Bits [63:48] are written to bits [111:96] of the result. 45240b57cec5SDimitry Andric /// \param __b 45250b57cec5SDimitry Andric /// A 128-bit vector of [8 x i16]. 45260b57cec5SDimitry Andric /// Bits [15:0] are written to bits [31:16] of the result. \n 45270b57cec5SDimitry Andric /// Bits [31:16] are written to bits [63:48] of the result. \n 45280b57cec5SDimitry Andric /// Bits [47:32] are written to bits [95:80] of the result. \n 45290b57cec5SDimitry Andric /// Bits [63:48] are written to bits [127:112] of the result. 45300b57cec5SDimitry Andric /// \returns A 128-bit vector of [8 x i16] containing the interleaved values. 453181ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, 453281ad6265SDimitry Andric __m128i __b) { 453381ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1, 453481ad6265SDimitry Andric 8 + 1, 2, 8 + 2, 3, 8 + 3); 45350b57cec5SDimitry Andric } 45360b57cec5SDimitry Andric 45370b57cec5SDimitry Andric /// Unpacks the low-order (index 0,1) values from two 128-bit vectors of 45380b57cec5SDimitry Andric /// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32]. 45390b57cec5SDimitry Andric /// 45400b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45410b57cec5SDimitry Andric /// 45420b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c> 45430b57cec5SDimitry Andric /// instruction. 45440b57cec5SDimitry Andric /// 45450b57cec5SDimitry Andric /// \param __a 45460b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45470b57cec5SDimitry Andric /// Bits [31:0] are written to bits [31:0] of the destination. \n 45480b57cec5SDimitry Andric /// Bits [63:32] are written to bits [95:64] of the destination. 45490b57cec5SDimitry Andric /// \param __b 45500b57cec5SDimitry Andric /// A 128-bit vector of [4 x i32]. \n 45510b57cec5SDimitry Andric /// Bits [31:0] are written to bits [64:32] of the destination. \n 45520b57cec5SDimitry Andric /// Bits [63:32] are written to bits [127:96] of the destination. 45530b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x i32] containing the interleaved values. 455481ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, 455581ad6265SDimitry Andric __m128i __b) { 455681ad6265SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1, 455781ad6265SDimitry Andric 4 + 1); 45580b57cec5SDimitry Andric } 45590b57cec5SDimitry Andric 45600b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors of 45610b57cec5SDimitry Andric /// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64]. 45620b57cec5SDimitry Andric /// 45630b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45640b57cec5SDimitry Andric /// 45650b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c> 45660b57cec5SDimitry Andric /// instruction. 45670b57cec5SDimitry Andric /// 45680b57cec5SDimitry Andric /// \param __a 45690b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45700b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. \n 45710b57cec5SDimitry Andric /// \param __b 45720b57cec5SDimitry Andric /// A 128-bit vector of [2 x i64]. \n 45730b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. \n 45740b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x i64] containing the interleaved values. 457581ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, 457681ad6265SDimitry Andric __m128i __b) { 45770b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0); 45780b57cec5SDimitry Andric } 45790b57cec5SDimitry Andric 45800b57cec5SDimitry Andric /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit 45810b57cec5SDimitry Andric /// integer. 45820b57cec5SDimitry Andric /// 45830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45840b57cec5SDimitry Andric /// 45850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction. 45860b57cec5SDimitry Andric /// 45870b57cec5SDimitry Andric /// \param __a 45880b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 45890b57cec5SDimitry Andric /// destination. 45900b57cec5SDimitry Andric /// \returns A 64-bit integer containing the lower 64 bits of the parameter. 459181ad6265SDimitry Andric static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { 45920b57cec5SDimitry Andric return (__m64)__a[0]; 45930b57cec5SDimitry Andric } 45940b57cec5SDimitry Andric 45950b57cec5SDimitry Andric /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the 45960b57cec5SDimitry Andric /// upper bits. 45970b57cec5SDimitry Andric /// 45980b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 45990b57cec5SDimitry Andric /// 46000b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction. 46010b57cec5SDimitry Andric /// 46020b57cec5SDimitry Andric /// \param __a 46030b57cec5SDimitry Andric /// A 64-bit value. 46040b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 46050b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 460681ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { 46070b57cec5SDimitry Andric return __extension__(__m128i)(__v2di){(long long)__a, 0}; 46080b57cec5SDimitry Andric } 46090b57cec5SDimitry Andric 46100b57cec5SDimitry Andric /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit 46110b57cec5SDimitry Andric /// integer vector, zeroing the upper bits. 46120b57cec5SDimitry Andric /// 46130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46140b57cec5SDimitry Andric /// 46150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction. 46160b57cec5SDimitry Andric /// 46170b57cec5SDimitry Andric /// \param __a 46180b57cec5SDimitry Andric /// A 128-bit integer vector operand. The lower 64 bits are moved to the 46190b57cec5SDimitry Andric /// destination. 46200b57cec5SDimitry Andric /// \returns A 128-bit integer vector. The lower 64 bits contain the value from 46210b57cec5SDimitry Andric /// the operand. The upper 64 bits are assigned zeros. 462281ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { 46230b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); 46240b57cec5SDimitry Andric } 46250b57cec5SDimitry Andric 46260b57cec5SDimitry Andric /// Unpacks the high-order 64-bit elements from two 128-bit vectors of 46270b57cec5SDimitry Andric /// [2 x double] and interleaves them into a 128-bit vector of [2 x 46280b57cec5SDimitry Andric /// double]. 46290b57cec5SDimitry Andric /// 46300b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46310b57cec5SDimitry Andric /// 46320b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction. 46330b57cec5SDimitry Andric /// 46340b57cec5SDimitry Andric /// \param __a 46350b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 46360b57cec5SDimitry Andric /// Bits [127:64] are written to bits [63:0] of the destination. 46370b57cec5SDimitry Andric /// \param __b 46380b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 46390b57cec5SDimitry Andric /// Bits [127:64] are written to bits [127:64] of the destination. 46400b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 464181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, 464281ad6265SDimitry Andric __m128d __b) { 46430b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); 46440b57cec5SDimitry Andric } 46450b57cec5SDimitry Andric 46460b57cec5SDimitry Andric /// Unpacks the low-order 64-bit elements from two 128-bit vectors 46470b57cec5SDimitry Andric /// of [2 x double] and interleaves them into a 128-bit vector of [2 x 46480b57cec5SDimitry Andric /// double]. 46490b57cec5SDimitry Andric /// 46500b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46510b57cec5SDimitry Andric /// 46520b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction. 46530b57cec5SDimitry Andric /// 46540b57cec5SDimitry Andric /// \param __a 46550b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 46560b57cec5SDimitry Andric /// Bits [63:0] are written to bits [63:0] of the destination. 46570b57cec5SDimitry Andric /// \param __b 46580b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. \n 46590b57cec5SDimitry Andric /// Bits [63:0] are written to bits [127:64] of the destination. 46600b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the interleaved values. 466181ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, 466281ad6265SDimitry Andric __m128d __b) { 46630b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); 46640b57cec5SDimitry Andric } 46650b57cec5SDimitry Andric 46660b57cec5SDimitry Andric /// Extracts the sign bits of the double-precision values in the 128-bit 46670b57cec5SDimitry Andric /// vector of [2 x double], zero-extends the value, and writes it to the 46680b57cec5SDimitry Andric /// low-order bits of the destination. 46690b57cec5SDimitry Andric /// 46700b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46710b57cec5SDimitry Andric /// 46720b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction. 46730b57cec5SDimitry Andric /// 46740b57cec5SDimitry Andric /// \param __a 46750b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the values with sign bits to 46760b57cec5SDimitry Andric /// be extracted. 46770b57cec5SDimitry Andric /// \returns The sign bits from each of the double-precision elements in \a __a, 46780b57cec5SDimitry Andric /// written to bits [1:0]. The remaining bits are assigned values of zero. 467981ad6265SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { 46800b57cec5SDimitry Andric return __builtin_ia32_movmskpd((__v2df)__a); 46810b57cec5SDimitry Andric } 46820b57cec5SDimitry Andric 46830b57cec5SDimitry Andric /// Constructs a 128-bit floating-point vector of [2 x double] from two 46840b57cec5SDimitry Andric /// 128-bit vector parameters of [2 x double], using the immediate-value 46850b57cec5SDimitry Andric /// parameter as a specifier. 46860b57cec5SDimitry Andric /// 46870b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 46880b57cec5SDimitry Andric /// 46890b57cec5SDimitry Andric /// \code 46900b57cec5SDimitry Andric /// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i); 46910b57cec5SDimitry Andric /// \endcode 46920b57cec5SDimitry Andric /// 46930b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction. 46940b57cec5SDimitry Andric /// 46950b57cec5SDimitry Andric /// \param a 46960b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46970b57cec5SDimitry Andric /// \param b 46980b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 46990b57cec5SDimitry Andric /// \param i 47000b57cec5SDimitry Andric /// An 8-bit immediate value. The least significant two bits specify which 47010b57cec5SDimitry Andric /// elements to copy from \a a and \a b: \n 47020b57cec5SDimitry Andric /// Bit[0] = 0: lower element of \a a copied to lower element of result. \n 47030b57cec5SDimitry Andric /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n 47040b57cec5SDimitry Andric /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n 47050b57cec5SDimitry Andric /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n 470681ad6265SDimitry Andric /// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. 470781ad6265SDimitry Andric /// <c>_MM_SHUFFLE2(b1, b0)</c> can create a 2-bit mask of the form 470881ad6265SDimitry Andric /// <c>[b1, b0]</c>. 47090b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the shuffled values. 47100b57cec5SDimitry Andric #define _mm_shuffle_pd(a, b, i) \ 4711349cc55cSDimitry Andric ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 4712349cc55cSDimitry Andric (int)(i))) 47130b57cec5SDimitry Andric 47140b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 47150b57cec5SDimitry Andric /// floating-point vector of [4 x float]. 47160b57cec5SDimitry Andric /// 47170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47180b57cec5SDimitry Andric /// 47190b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47200b57cec5SDimitry Andric /// 47210b57cec5SDimitry Andric /// \param __a 47220b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 47230b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 47240b57cec5SDimitry Andric /// bitwise pattern as the parameter. 472581ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { 47260b57cec5SDimitry Andric return (__m128)__a; 47270b57cec5SDimitry Andric } 47280b57cec5SDimitry Andric 47290b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit 47300b57cec5SDimitry Andric /// integer vector. 47310b57cec5SDimitry Andric /// 47320b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47330b57cec5SDimitry Andric /// 47340b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47350b57cec5SDimitry Andric /// 47360b57cec5SDimitry Andric /// \param __a 47370b57cec5SDimitry Andric /// A 128-bit floating-point vector of [2 x double]. 47380b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 47390b57cec5SDimitry Andric /// parameter. 474081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { 47410b57cec5SDimitry Andric return (__m128i)__a; 47420b57cec5SDimitry Andric } 47430b57cec5SDimitry Andric 47440b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 47450b57cec5SDimitry Andric /// floating-point vector of [2 x double]. 47460b57cec5SDimitry Andric /// 47470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47480b57cec5SDimitry Andric /// 47490b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47500b57cec5SDimitry Andric /// 47510b57cec5SDimitry Andric /// \param __a 47520b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 47530b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 47540b57cec5SDimitry Andric /// bitwise pattern as the parameter. 475581ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { 47560b57cec5SDimitry Andric return (__m128d)__a; 47570b57cec5SDimitry Andric } 47580b57cec5SDimitry Andric 47590b57cec5SDimitry Andric /// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit 47600b57cec5SDimitry Andric /// integer vector. 47610b57cec5SDimitry Andric /// 47620b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47630b57cec5SDimitry Andric /// 47640b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47650b57cec5SDimitry Andric /// 47660b57cec5SDimitry Andric /// \param __a 47670b57cec5SDimitry Andric /// A 128-bit floating-point vector of [4 x float]. 47680b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the same bitwise pattern as the 47690b57cec5SDimitry Andric /// parameter. 477081ad6265SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { 47710b57cec5SDimitry Andric return (__m128i)__a; 47720b57cec5SDimitry Andric } 47730b57cec5SDimitry Andric 47740b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47750b57cec5SDimitry Andric /// of [4 x float]. 47760b57cec5SDimitry Andric /// 47770b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47780b57cec5SDimitry Andric /// 47790b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47800b57cec5SDimitry Andric /// 47810b57cec5SDimitry Andric /// \param __a 47820b57cec5SDimitry Andric /// A 128-bit integer vector. 47830b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [4 x float] containing the same 47840b57cec5SDimitry Andric /// bitwise pattern as the parameter. 478581ad6265SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { 47860b57cec5SDimitry Andric return (__m128)__a; 47870b57cec5SDimitry Andric } 47880b57cec5SDimitry Andric 47890b57cec5SDimitry Andric /// Casts a 128-bit integer vector into a 128-bit floating-point vector 47900b57cec5SDimitry Andric /// of [2 x double]. 47910b57cec5SDimitry Andric /// 47920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 47930b57cec5SDimitry Andric /// 47940b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 47950b57cec5SDimitry Andric /// 47960b57cec5SDimitry Andric /// \param __a 47970b57cec5SDimitry Andric /// A 128-bit integer vector. 47980b57cec5SDimitry Andric /// \returns A 128-bit floating-point vector of [2 x double] containing the same 47990b57cec5SDimitry Andric /// bitwise pattern as the parameter. 480081ad6265SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { 48010b57cec5SDimitry Andric return (__m128d)__a; 48020b57cec5SDimitry Andric } 48030b57cec5SDimitry Andric 48040fca6ea1SDimitry Andric /// Compares each of the corresponding double-precision values of two 48050fca6ea1SDimitry Andric /// 128-bit vectors of [2 x double], using the operation specified by the 48060fca6ea1SDimitry Andric /// immediate integer operand. 48070fca6ea1SDimitry Andric /// 48080fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 48090fca6ea1SDimitry Andric /// If either value in a comparison is NaN, comparisons that are ordered 48100fca6ea1SDimitry Andric /// return false, and comparisons that are unordered return true. 48110fca6ea1SDimitry Andric /// 48120fca6ea1SDimitry Andric /// \headerfile <x86intrin.h> 48130fca6ea1SDimitry Andric /// 48140fca6ea1SDimitry Andric /// \code 48150fca6ea1SDimitry Andric /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c); 48160fca6ea1SDimitry Andric /// \endcode 48170fca6ea1SDimitry Andric /// 48180fca6ea1SDimitry Andric /// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction. 48190fca6ea1SDimitry Andric /// 48200fca6ea1SDimitry Andric /// \param a 48210fca6ea1SDimitry Andric /// A 128-bit vector of [2 x double]. 48220fca6ea1SDimitry Andric /// \param b 48230fca6ea1SDimitry Andric /// A 128-bit vector of [2 x double]. 48240fca6ea1SDimitry Andric /// \param c 48250fca6ea1SDimitry Andric /// An immediate integer operand, with bits [4:0] specifying which comparison 48260fca6ea1SDimitry Andric /// operation to use: \n 48270fca6ea1SDimitry Andric /// 0x00: Equal (ordered, non-signaling) \n 48280fca6ea1SDimitry Andric /// 0x01: Less-than (ordered, signaling) \n 48290fca6ea1SDimitry Andric /// 0x02: Less-than-or-equal (ordered, signaling) \n 48300fca6ea1SDimitry Andric /// 0x03: Unordered (non-signaling) \n 48310fca6ea1SDimitry Andric /// 0x04: Not-equal (unordered, non-signaling) \n 48320fca6ea1SDimitry Andric /// 0x05: Not-less-than (unordered, signaling) \n 48330fca6ea1SDimitry Andric /// 0x06: Not-less-than-or-equal (unordered, signaling) \n 48340fca6ea1SDimitry Andric /// 0x07: Ordered (non-signaling) \n 48350fca6ea1SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the comparison results. 48360fca6ea1SDimitry Andric #define _mm_cmp_pd(a, b, c) \ 48370fca6ea1SDimitry Andric ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 48380fca6ea1SDimitry Andric (c))) 48390fca6ea1SDimitry Andric 48400fca6ea1SDimitry Andric /// Compares each of the corresponding scalar double-precision values of 48410fca6ea1SDimitry Andric /// two 128-bit vectors of [2 x double], using the operation specified by the 48420fca6ea1SDimitry Andric /// immediate integer operand. 48430fca6ea1SDimitry Andric /// 48440fca6ea1SDimitry Andric /// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. 48450fca6ea1SDimitry Andric /// If either value in a comparison is NaN, comparisons that are ordered 48460fca6ea1SDimitry Andric /// return false, and comparisons that are unordered return true. 48470fca6ea1SDimitry Andric /// 48480fca6ea1SDimitry Andric /// \headerfile <x86intrin.h> 48490fca6ea1SDimitry Andric /// 48500fca6ea1SDimitry Andric /// \code 48510fca6ea1SDimitry Andric /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c); 48520fca6ea1SDimitry Andric /// \endcode 48530fca6ea1SDimitry Andric /// 48540fca6ea1SDimitry Andric /// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction. 48550fca6ea1SDimitry Andric /// 48560fca6ea1SDimitry Andric /// \param a 48570fca6ea1SDimitry Andric /// A 128-bit vector of [2 x double]. 48580fca6ea1SDimitry Andric /// \param b 48590fca6ea1SDimitry Andric /// A 128-bit vector of [2 x double]. 48600fca6ea1SDimitry Andric /// \param c 48610fca6ea1SDimitry Andric /// An immediate integer operand, with bits [4:0] specifying which comparison 48620fca6ea1SDimitry Andric /// operation to use: \n 48630fca6ea1SDimitry Andric /// 0x00: Equal (ordered, non-signaling) \n 48640fca6ea1SDimitry Andric /// 0x01: Less-than (ordered, signaling) \n 48650fca6ea1SDimitry Andric /// 0x02: Less-than-or-equal (ordered, signaling) \n 48660fca6ea1SDimitry Andric /// 0x03: Unordered (non-signaling) \n 48670fca6ea1SDimitry Andric /// 0x04: Not-equal (unordered, non-signaling) \n 48680fca6ea1SDimitry Andric /// 0x05: Not-less-than (unordered, signaling) \n 48690fca6ea1SDimitry Andric /// 0x06: Not-less-than-or-equal (unordered, signaling) \n 48700fca6ea1SDimitry Andric /// 0x07: Ordered (non-signaling) \n 48710fca6ea1SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the comparison results. 48720fca6ea1SDimitry Andric #define _mm_cmp_sd(a, b, c) \ 48730fca6ea1SDimitry Andric ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ 48740fca6ea1SDimitry Andric (c))) 48750fca6ea1SDimitry Andric 48760b57cec5SDimitry Andric #if defined(__cplusplus) 48770b57cec5SDimitry Andric extern "C" { 48780b57cec5SDimitry Andric #endif 48790b57cec5SDimitry Andric 48800b57cec5SDimitry Andric /// Indicates that a spin loop is being executed for the purposes of 48810b57cec5SDimitry Andric /// optimizing power consumption during the loop. 48820b57cec5SDimitry Andric /// 48830b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 48840b57cec5SDimitry Andric /// 48850b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> PAUSE </c> instruction. 48860b57cec5SDimitry Andric /// 48870b57cec5SDimitry Andric void _mm_pause(void); 48880b57cec5SDimitry Andric 48890b57cec5SDimitry Andric #if defined(__cplusplus) 48900b57cec5SDimitry Andric } // extern "C" 48910b57cec5SDimitry Andric #endif 48920b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 48930b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS_MMX 48940b57cec5SDimitry Andric 48950b57cec5SDimitry Andric #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 48960b57cec5SDimitry Andric 48975ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_ON (0x0040U) 48985ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_OFF (0x0000U) 48990b57cec5SDimitry Andric 49005ffd83dbSDimitry Andric #define _MM_DENORMALS_ZERO_MASK (0x0040U) 49010b57cec5SDimitry Andric 49020b57cec5SDimitry Andric #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 490381ad6265SDimitry Andric #define _MM_SET_DENORMALS_ZERO_MODE(x) \ 490481ad6265SDimitry Andric (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 49050b57cec5SDimitry Andric 49060b57cec5SDimitry Andric #endif /* __EMMINTRIN_H */ 4907