1*f4a2713aSLionel Sambuc /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2*f4a2713aSLionel Sambuc *
3*f4a2713aSLionel Sambuc * Permission is hereby granted, free of charge, to any person obtaining a copy
4*f4a2713aSLionel Sambuc * of this software and associated documentation files (the "Software"), to deal
5*f4a2713aSLionel Sambuc * in the Software without restriction, including without limitation the rights
6*f4a2713aSLionel Sambuc * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*f4a2713aSLionel Sambuc * copies of the Software, and to permit persons to whom the Software is
8*f4a2713aSLionel Sambuc * furnished to do so, subject to the following conditions:
9*f4a2713aSLionel Sambuc *
10*f4a2713aSLionel Sambuc * The above copyright notice and this permission notice shall be included in
11*f4a2713aSLionel Sambuc * all copies or substantial portions of the Software.
12*f4a2713aSLionel Sambuc *
13*f4a2713aSLionel Sambuc * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*f4a2713aSLionel Sambuc * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*f4a2713aSLionel Sambuc * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*f4a2713aSLionel Sambuc * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*f4a2713aSLionel Sambuc * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*f4a2713aSLionel Sambuc * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*f4a2713aSLionel Sambuc * THE SOFTWARE.
20*f4a2713aSLionel Sambuc *
21*f4a2713aSLionel Sambuc *===-----------------------------------------------------------------------===
22*f4a2713aSLionel Sambuc */
23*f4a2713aSLionel Sambuc
24*f4a2713aSLionel Sambuc #ifndef __X86INTRIN_H
25*f4a2713aSLionel Sambuc #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26*f4a2713aSLionel Sambuc #endif
27*f4a2713aSLionel Sambuc
28*f4a2713aSLionel Sambuc #ifndef __XOPINTRIN_H
29*f4a2713aSLionel Sambuc #define __XOPINTRIN_H
30*f4a2713aSLionel Sambuc
31*f4a2713aSLionel Sambuc #ifndef __XOP__
32*f4a2713aSLionel Sambuc # error "XOP instruction set is not enabled"
33*f4a2713aSLionel Sambuc #else
34*f4a2713aSLionel Sambuc
35*f4a2713aSLionel Sambuc #include <fma4intrin.h>
36*f4a2713aSLionel Sambuc
37*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)38*f4a2713aSLionel Sambuc _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
39*f4a2713aSLionel Sambuc {
40*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
41*f4a2713aSLionel Sambuc }
42*f4a2713aSLionel Sambuc
43*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)44*f4a2713aSLionel Sambuc _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
45*f4a2713aSLionel Sambuc {
46*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
47*f4a2713aSLionel Sambuc }
48*f4a2713aSLionel Sambuc
49*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)50*f4a2713aSLionel Sambuc _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
51*f4a2713aSLionel Sambuc {
52*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
53*f4a2713aSLionel Sambuc }
54*f4a2713aSLionel Sambuc
55*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)56*f4a2713aSLionel Sambuc _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
57*f4a2713aSLionel Sambuc {
58*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
59*f4a2713aSLionel Sambuc }
60*f4a2713aSLionel Sambuc
61*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)62*f4a2713aSLionel Sambuc _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
63*f4a2713aSLionel Sambuc {
64*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
65*f4a2713aSLionel Sambuc }
66*f4a2713aSLionel Sambuc
67*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)68*f4a2713aSLionel Sambuc _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
69*f4a2713aSLionel Sambuc {
70*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
71*f4a2713aSLionel Sambuc }
72*f4a2713aSLionel Sambuc
73*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)74*f4a2713aSLionel Sambuc _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
75*f4a2713aSLionel Sambuc {
76*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
77*f4a2713aSLionel Sambuc }
78*f4a2713aSLionel Sambuc
79*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)80*f4a2713aSLionel Sambuc _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
81*f4a2713aSLionel Sambuc {
82*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
83*f4a2713aSLionel Sambuc }
84*f4a2713aSLionel Sambuc
85*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)86*f4a2713aSLionel Sambuc _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
87*f4a2713aSLionel Sambuc {
88*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
89*f4a2713aSLionel Sambuc }
90*f4a2713aSLionel Sambuc
91*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)92*f4a2713aSLionel Sambuc _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
93*f4a2713aSLionel Sambuc {
94*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
95*f4a2713aSLionel Sambuc }
96*f4a2713aSLionel Sambuc
97*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)98*f4a2713aSLionel Sambuc _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
99*f4a2713aSLionel Sambuc {
100*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
101*f4a2713aSLionel Sambuc }
102*f4a2713aSLionel Sambuc
103*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)104*f4a2713aSLionel Sambuc _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
105*f4a2713aSLionel Sambuc {
106*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
107*f4a2713aSLionel Sambuc }
108*f4a2713aSLionel Sambuc
109*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddw_epi8(__m128i __A)110*f4a2713aSLionel Sambuc _mm_haddw_epi8(__m128i __A)
111*f4a2713aSLionel Sambuc {
112*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
113*f4a2713aSLionel Sambuc }
114*f4a2713aSLionel Sambuc
115*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epi8(__m128i __A)116*f4a2713aSLionel Sambuc _mm_haddd_epi8(__m128i __A)
117*f4a2713aSLionel Sambuc {
118*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
119*f4a2713aSLionel Sambuc }
120*f4a2713aSLionel Sambuc
121*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi8(__m128i __A)122*f4a2713aSLionel Sambuc _mm_haddq_epi8(__m128i __A)
123*f4a2713aSLionel Sambuc {
124*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
125*f4a2713aSLionel Sambuc }
126*f4a2713aSLionel Sambuc
127*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epi16(__m128i __A)128*f4a2713aSLionel Sambuc _mm_haddd_epi16(__m128i __A)
129*f4a2713aSLionel Sambuc {
130*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
131*f4a2713aSLionel Sambuc }
132*f4a2713aSLionel Sambuc
133*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi16(__m128i __A)134*f4a2713aSLionel Sambuc _mm_haddq_epi16(__m128i __A)
135*f4a2713aSLionel Sambuc {
136*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
137*f4a2713aSLionel Sambuc }
138*f4a2713aSLionel Sambuc
139*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi32(__m128i __A)140*f4a2713aSLionel Sambuc _mm_haddq_epi32(__m128i __A)
141*f4a2713aSLionel Sambuc {
142*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
143*f4a2713aSLionel Sambuc }
144*f4a2713aSLionel Sambuc
145*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddw_epu8(__m128i __A)146*f4a2713aSLionel Sambuc _mm_haddw_epu8(__m128i __A)
147*f4a2713aSLionel Sambuc {
148*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
149*f4a2713aSLionel Sambuc }
150*f4a2713aSLionel Sambuc
151*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epu8(__m128i __A)152*f4a2713aSLionel Sambuc _mm_haddd_epu8(__m128i __A)
153*f4a2713aSLionel Sambuc {
154*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
155*f4a2713aSLionel Sambuc }
156*f4a2713aSLionel Sambuc
157*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu8(__m128i __A)158*f4a2713aSLionel Sambuc _mm_haddq_epu8(__m128i __A)
159*f4a2713aSLionel Sambuc {
160*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
161*f4a2713aSLionel Sambuc }
162*f4a2713aSLionel Sambuc
163*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epu16(__m128i __A)164*f4a2713aSLionel Sambuc _mm_haddd_epu16(__m128i __A)
165*f4a2713aSLionel Sambuc {
166*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
167*f4a2713aSLionel Sambuc }
168*f4a2713aSLionel Sambuc
169*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu16(__m128i __A)170*f4a2713aSLionel Sambuc _mm_haddq_epu16(__m128i __A)
171*f4a2713aSLionel Sambuc {
172*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
173*f4a2713aSLionel Sambuc }
174*f4a2713aSLionel Sambuc
175*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu32(__m128i __A)176*f4a2713aSLionel Sambuc _mm_haddq_epu32(__m128i __A)
177*f4a2713aSLionel Sambuc {
178*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
179*f4a2713aSLionel Sambuc }
180*f4a2713aSLionel Sambuc
181*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubw_epi8(__m128i __A)182*f4a2713aSLionel Sambuc _mm_hsubw_epi8(__m128i __A)
183*f4a2713aSLionel Sambuc {
184*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
185*f4a2713aSLionel Sambuc }
186*f4a2713aSLionel Sambuc
187*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubd_epi16(__m128i __A)188*f4a2713aSLionel Sambuc _mm_hsubd_epi16(__m128i __A)
189*f4a2713aSLionel Sambuc {
190*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
191*f4a2713aSLionel Sambuc }
192*f4a2713aSLionel Sambuc
193*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubq_epi32(__m128i __A)194*f4a2713aSLionel Sambuc _mm_hsubq_epi32(__m128i __A)
195*f4a2713aSLionel Sambuc {
196*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
197*f4a2713aSLionel Sambuc }
198*f4a2713aSLionel Sambuc
199*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)200*f4a2713aSLionel Sambuc _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
201*f4a2713aSLionel Sambuc {
202*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
203*f4a2713aSLionel Sambuc }
204*f4a2713aSLionel Sambuc
205*f4a2713aSLionel Sambuc static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)206*f4a2713aSLionel Sambuc _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
207*f4a2713aSLionel Sambuc {
208*f4a2713aSLionel Sambuc return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
209*f4a2713aSLionel Sambuc }
210*f4a2713aSLionel Sambuc
211*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)212*f4a2713aSLionel Sambuc _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
213*f4a2713aSLionel Sambuc {
214*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
215*f4a2713aSLionel Sambuc }
216*f4a2713aSLionel Sambuc
217*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi8(__m128i __A,__m128i __B)218*f4a2713aSLionel Sambuc _mm_rot_epi8(__m128i __A, __m128i __B)
219*f4a2713aSLionel Sambuc {
220*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
221*f4a2713aSLionel Sambuc }
222*f4a2713aSLionel Sambuc
223*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi16(__m128i __A,__m128i __B)224*f4a2713aSLionel Sambuc _mm_rot_epi16(__m128i __A, __m128i __B)
225*f4a2713aSLionel Sambuc {
226*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
227*f4a2713aSLionel Sambuc }
228*f4a2713aSLionel Sambuc
229*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi32(__m128i __A,__m128i __B)230*f4a2713aSLionel Sambuc _mm_rot_epi32(__m128i __A, __m128i __B)
231*f4a2713aSLionel Sambuc {
232*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
233*f4a2713aSLionel Sambuc }
234*f4a2713aSLionel Sambuc
235*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi64(__m128i __A,__m128i __B)236*f4a2713aSLionel Sambuc _mm_rot_epi64(__m128i __A, __m128i __B)
237*f4a2713aSLionel Sambuc {
238*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
239*f4a2713aSLionel Sambuc }
240*f4a2713aSLionel Sambuc
241*f4a2713aSLionel Sambuc #define _mm_roti_epi8(A, N) __extension__ ({ \
242*f4a2713aSLionel Sambuc __m128i __A = (A); \
243*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
244*f4a2713aSLionel Sambuc
245*f4a2713aSLionel Sambuc #define _mm_roti_epi16(A, N) __extension__ ({ \
246*f4a2713aSLionel Sambuc __m128i __A = (A); \
247*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
248*f4a2713aSLionel Sambuc
249*f4a2713aSLionel Sambuc #define _mm_roti_epi32(A, N) __extension__ ({ \
250*f4a2713aSLionel Sambuc __m128i __A = (A); \
251*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
252*f4a2713aSLionel Sambuc
253*f4a2713aSLionel Sambuc #define _mm_roti_epi64(A, N) __extension__ ({ \
254*f4a2713aSLionel Sambuc __m128i __A = (A); \
255*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
256*f4a2713aSLionel Sambuc
257*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi8(__m128i __A,__m128i __B)258*f4a2713aSLionel Sambuc _mm_shl_epi8(__m128i __A, __m128i __B)
259*f4a2713aSLionel Sambuc {
260*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
261*f4a2713aSLionel Sambuc }
262*f4a2713aSLionel Sambuc
263*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi16(__m128i __A,__m128i __B)264*f4a2713aSLionel Sambuc _mm_shl_epi16(__m128i __A, __m128i __B)
265*f4a2713aSLionel Sambuc {
266*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
267*f4a2713aSLionel Sambuc }
268*f4a2713aSLionel Sambuc
269*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi32(__m128i __A,__m128i __B)270*f4a2713aSLionel Sambuc _mm_shl_epi32(__m128i __A, __m128i __B)
271*f4a2713aSLionel Sambuc {
272*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
273*f4a2713aSLionel Sambuc }
274*f4a2713aSLionel Sambuc
275*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi64(__m128i __A,__m128i __B)276*f4a2713aSLionel Sambuc _mm_shl_epi64(__m128i __A, __m128i __B)
277*f4a2713aSLionel Sambuc {
278*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
279*f4a2713aSLionel Sambuc }
280*f4a2713aSLionel Sambuc
281*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi8(__m128i __A,__m128i __B)282*f4a2713aSLionel Sambuc _mm_sha_epi8(__m128i __A, __m128i __B)
283*f4a2713aSLionel Sambuc {
284*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
285*f4a2713aSLionel Sambuc }
286*f4a2713aSLionel Sambuc
287*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi16(__m128i __A,__m128i __B)288*f4a2713aSLionel Sambuc _mm_sha_epi16(__m128i __A, __m128i __B)
289*f4a2713aSLionel Sambuc {
290*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
291*f4a2713aSLionel Sambuc }
292*f4a2713aSLionel Sambuc
293*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi32(__m128i __A,__m128i __B)294*f4a2713aSLionel Sambuc _mm_sha_epi32(__m128i __A, __m128i __B)
295*f4a2713aSLionel Sambuc {
296*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
297*f4a2713aSLionel Sambuc }
298*f4a2713aSLionel Sambuc
299*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi64(__m128i __A,__m128i __B)300*f4a2713aSLionel Sambuc _mm_sha_epi64(__m128i __A, __m128i __B)
301*f4a2713aSLionel Sambuc {
302*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
303*f4a2713aSLionel Sambuc }
304*f4a2713aSLionel Sambuc
305*f4a2713aSLionel Sambuc #define _mm_com_epu8(A, B, N) __extension__ ({ \
306*f4a2713aSLionel Sambuc __m128i __A = (A); \
307*f4a2713aSLionel Sambuc __m128i __B = (B); \
308*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
309*f4a2713aSLionel Sambuc
310*f4a2713aSLionel Sambuc #define _mm_com_epu16(A, B, N) __extension__ ({ \
311*f4a2713aSLionel Sambuc __m128i __A = (A); \
312*f4a2713aSLionel Sambuc __m128i __B = (B); \
313*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
314*f4a2713aSLionel Sambuc
315*f4a2713aSLionel Sambuc #define _mm_com_epu32(A, B, N) __extension__ ({ \
316*f4a2713aSLionel Sambuc __m128i __A = (A); \
317*f4a2713aSLionel Sambuc __m128i __B = (B); \
318*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
319*f4a2713aSLionel Sambuc
320*f4a2713aSLionel Sambuc #define _mm_com_epu64(A, B, N) __extension__ ({ \
321*f4a2713aSLionel Sambuc __m128i __A = (A); \
322*f4a2713aSLionel Sambuc __m128i __B = (B); \
323*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
324*f4a2713aSLionel Sambuc
325*f4a2713aSLionel Sambuc #define _mm_com_epi8(A, B, N) __extension__ ({ \
326*f4a2713aSLionel Sambuc __m128i __A = (A); \
327*f4a2713aSLionel Sambuc __m128i __B = (B); \
328*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
329*f4a2713aSLionel Sambuc
330*f4a2713aSLionel Sambuc #define _mm_com_epi16(A, B, N) __extension__ ({ \
331*f4a2713aSLionel Sambuc __m128i __A = (A); \
332*f4a2713aSLionel Sambuc __m128i __B = (B); \
333*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
334*f4a2713aSLionel Sambuc
335*f4a2713aSLionel Sambuc #define _mm_com_epi32(A, B, N) __extension__ ({ \
336*f4a2713aSLionel Sambuc __m128i __A = (A); \
337*f4a2713aSLionel Sambuc __m128i __B = (B); \
338*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
339*f4a2713aSLionel Sambuc
340*f4a2713aSLionel Sambuc #define _mm_com_epi64(A, B, N) __extension__ ({ \
341*f4a2713aSLionel Sambuc __m128i __A = (A); \
342*f4a2713aSLionel Sambuc __m128i __B = (B); \
343*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
344*f4a2713aSLionel Sambuc
345*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_LT 0
346*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_LE 1
347*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_GT 2
348*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_GE 3
349*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_EQ 4
350*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_NEQ 5
351*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_FALSE 6
352*f4a2713aSLionel Sambuc #define _MM_PCOMCTRL_TRUE 7
353*f4a2713aSLionel Sambuc
354*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu8(__m128i __A,__m128i __B)355*f4a2713aSLionel Sambuc _mm_comlt_epu8(__m128i __A, __m128i __B)
356*f4a2713aSLionel Sambuc {
357*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
358*f4a2713aSLionel Sambuc }
359*f4a2713aSLionel Sambuc
360*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu8(__m128i __A,__m128i __B)361*f4a2713aSLionel Sambuc _mm_comle_epu8(__m128i __A, __m128i __B)
362*f4a2713aSLionel Sambuc {
363*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
364*f4a2713aSLionel Sambuc }
365*f4a2713aSLionel Sambuc
366*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu8(__m128i __A,__m128i __B)367*f4a2713aSLionel Sambuc _mm_comgt_epu8(__m128i __A, __m128i __B)
368*f4a2713aSLionel Sambuc {
369*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
370*f4a2713aSLionel Sambuc }
371*f4a2713aSLionel Sambuc
372*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu8(__m128i __A,__m128i __B)373*f4a2713aSLionel Sambuc _mm_comge_epu8(__m128i __A, __m128i __B)
374*f4a2713aSLionel Sambuc {
375*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
376*f4a2713aSLionel Sambuc }
377*f4a2713aSLionel Sambuc
378*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu8(__m128i __A,__m128i __B)379*f4a2713aSLionel Sambuc _mm_comeq_epu8(__m128i __A, __m128i __B)
380*f4a2713aSLionel Sambuc {
381*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
382*f4a2713aSLionel Sambuc }
383*f4a2713aSLionel Sambuc
384*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu8(__m128i __A,__m128i __B)385*f4a2713aSLionel Sambuc _mm_comneq_epu8(__m128i __A, __m128i __B)
386*f4a2713aSLionel Sambuc {
387*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
388*f4a2713aSLionel Sambuc }
389*f4a2713aSLionel Sambuc
390*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu8(__m128i __A,__m128i __B)391*f4a2713aSLionel Sambuc _mm_comfalse_epu8(__m128i __A, __m128i __B)
392*f4a2713aSLionel Sambuc {
393*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
394*f4a2713aSLionel Sambuc }
395*f4a2713aSLionel Sambuc
396*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu8(__m128i __A,__m128i __B)397*f4a2713aSLionel Sambuc _mm_comtrue_epu8(__m128i __A, __m128i __B)
398*f4a2713aSLionel Sambuc {
399*f4a2713aSLionel Sambuc return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
400*f4a2713aSLionel Sambuc }
401*f4a2713aSLionel Sambuc
402*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu16(__m128i __A,__m128i __B)403*f4a2713aSLionel Sambuc _mm_comlt_epu16(__m128i __A, __m128i __B)
404*f4a2713aSLionel Sambuc {
405*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
406*f4a2713aSLionel Sambuc }
407*f4a2713aSLionel Sambuc
408*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu16(__m128i __A,__m128i __B)409*f4a2713aSLionel Sambuc _mm_comle_epu16(__m128i __A, __m128i __B)
410*f4a2713aSLionel Sambuc {
411*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
412*f4a2713aSLionel Sambuc }
413*f4a2713aSLionel Sambuc
414*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu16(__m128i __A,__m128i __B)415*f4a2713aSLionel Sambuc _mm_comgt_epu16(__m128i __A, __m128i __B)
416*f4a2713aSLionel Sambuc {
417*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
418*f4a2713aSLionel Sambuc }
419*f4a2713aSLionel Sambuc
420*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu16(__m128i __A,__m128i __B)421*f4a2713aSLionel Sambuc _mm_comge_epu16(__m128i __A, __m128i __B)
422*f4a2713aSLionel Sambuc {
423*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
424*f4a2713aSLionel Sambuc }
425*f4a2713aSLionel Sambuc
426*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu16(__m128i __A,__m128i __B)427*f4a2713aSLionel Sambuc _mm_comeq_epu16(__m128i __A, __m128i __B)
428*f4a2713aSLionel Sambuc {
429*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
430*f4a2713aSLionel Sambuc }
431*f4a2713aSLionel Sambuc
432*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu16(__m128i __A,__m128i __B)433*f4a2713aSLionel Sambuc _mm_comneq_epu16(__m128i __A, __m128i __B)
434*f4a2713aSLionel Sambuc {
435*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
436*f4a2713aSLionel Sambuc }
437*f4a2713aSLionel Sambuc
438*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu16(__m128i __A,__m128i __B)439*f4a2713aSLionel Sambuc _mm_comfalse_epu16(__m128i __A, __m128i __B)
440*f4a2713aSLionel Sambuc {
441*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
442*f4a2713aSLionel Sambuc }
443*f4a2713aSLionel Sambuc
444*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu16(__m128i __A,__m128i __B)445*f4a2713aSLionel Sambuc _mm_comtrue_epu16(__m128i __A, __m128i __B)
446*f4a2713aSLionel Sambuc {
447*f4a2713aSLionel Sambuc return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
448*f4a2713aSLionel Sambuc }
449*f4a2713aSLionel Sambuc
450*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu32(__m128i __A,__m128i __B)451*f4a2713aSLionel Sambuc _mm_comlt_epu32(__m128i __A, __m128i __B)
452*f4a2713aSLionel Sambuc {
453*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
454*f4a2713aSLionel Sambuc }
455*f4a2713aSLionel Sambuc
456*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu32(__m128i __A,__m128i __B)457*f4a2713aSLionel Sambuc _mm_comle_epu32(__m128i __A, __m128i __B)
458*f4a2713aSLionel Sambuc {
459*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
460*f4a2713aSLionel Sambuc }
461*f4a2713aSLionel Sambuc
462*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu32(__m128i __A,__m128i __B)463*f4a2713aSLionel Sambuc _mm_comgt_epu32(__m128i __A, __m128i __B)
464*f4a2713aSLionel Sambuc {
465*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
466*f4a2713aSLionel Sambuc }
467*f4a2713aSLionel Sambuc
468*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu32(__m128i __A,__m128i __B)469*f4a2713aSLionel Sambuc _mm_comge_epu32(__m128i __A, __m128i __B)
470*f4a2713aSLionel Sambuc {
471*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
472*f4a2713aSLionel Sambuc }
473*f4a2713aSLionel Sambuc
474*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu32(__m128i __A,__m128i __B)475*f4a2713aSLionel Sambuc _mm_comeq_epu32(__m128i __A, __m128i __B)
476*f4a2713aSLionel Sambuc {
477*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
478*f4a2713aSLionel Sambuc }
479*f4a2713aSLionel Sambuc
480*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu32(__m128i __A,__m128i __B)481*f4a2713aSLionel Sambuc _mm_comneq_epu32(__m128i __A, __m128i __B)
482*f4a2713aSLionel Sambuc {
483*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
484*f4a2713aSLionel Sambuc }
485*f4a2713aSLionel Sambuc
486*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu32(__m128i __A,__m128i __B)487*f4a2713aSLionel Sambuc _mm_comfalse_epu32(__m128i __A, __m128i __B)
488*f4a2713aSLionel Sambuc {
489*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
490*f4a2713aSLionel Sambuc }
491*f4a2713aSLionel Sambuc
492*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu32(__m128i __A,__m128i __B)493*f4a2713aSLionel Sambuc _mm_comtrue_epu32(__m128i __A, __m128i __B)
494*f4a2713aSLionel Sambuc {
495*f4a2713aSLionel Sambuc return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
496*f4a2713aSLionel Sambuc }
497*f4a2713aSLionel Sambuc
498*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu64(__m128i __A,__m128i __B)499*f4a2713aSLionel Sambuc _mm_comlt_epu64(__m128i __A, __m128i __B)
500*f4a2713aSLionel Sambuc {
501*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
502*f4a2713aSLionel Sambuc }
503*f4a2713aSLionel Sambuc
504*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu64(__m128i __A,__m128i __B)505*f4a2713aSLionel Sambuc _mm_comle_epu64(__m128i __A, __m128i __B)
506*f4a2713aSLionel Sambuc {
507*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
508*f4a2713aSLionel Sambuc }
509*f4a2713aSLionel Sambuc
510*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu64(__m128i __A,__m128i __B)511*f4a2713aSLionel Sambuc _mm_comgt_epu64(__m128i __A, __m128i __B)
512*f4a2713aSLionel Sambuc {
513*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
514*f4a2713aSLionel Sambuc }
515*f4a2713aSLionel Sambuc
516*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu64(__m128i __A,__m128i __B)517*f4a2713aSLionel Sambuc _mm_comge_epu64(__m128i __A, __m128i __B)
518*f4a2713aSLionel Sambuc {
519*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
520*f4a2713aSLionel Sambuc }
521*f4a2713aSLionel Sambuc
522*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu64(__m128i __A,__m128i __B)523*f4a2713aSLionel Sambuc _mm_comeq_epu64(__m128i __A, __m128i __B)
524*f4a2713aSLionel Sambuc {
525*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
526*f4a2713aSLionel Sambuc }
527*f4a2713aSLionel Sambuc
528*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu64(__m128i __A,__m128i __B)529*f4a2713aSLionel Sambuc _mm_comneq_epu64(__m128i __A, __m128i __B)
530*f4a2713aSLionel Sambuc {
531*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
532*f4a2713aSLionel Sambuc }
533*f4a2713aSLionel Sambuc
534*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu64(__m128i __A,__m128i __B)535*f4a2713aSLionel Sambuc _mm_comfalse_epu64(__m128i __A, __m128i __B)
536*f4a2713aSLionel Sambuc {
537*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
538*f4a2713aSLionel Sambuc }
539*f4a2713aSLionel Sambuc
540*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu64(__m128i __A,__m128i __B)541*f4a2713aSLionel Sambuc _mm_comtrue_epu64(__m128i __A, __m128i __B)
542*f4a2713aSLionel Sambuc {
543*f4a2713aSLionel Sambuc return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
544*f4a2713aSLionel Sambuc }
545*f4a2713aSLionel Sambuc
546*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi8(__m128i __A,__m128i __B)547*f4a2713aSLionel Sambuc _mm_comlt_epi8(__m128i __A, __m128i __B)
548*f4a2713aSLionel Sambuc {
549*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
550*f4a2713aSLionel Sambuc }
551*f4a2713aSLionel Sambuc
552*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi8(__m128i __A,__m128i __B)553*f4a2713aSLionel Sambuc _mm_comle_epi8(__m128i __A, __m128i __B)
554*f4a2713aSLionel Sambuc {
555*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
556*f4a2713aSLionel Sambuc }
557*f4a2713aSLionel Sambuc
558*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi8(__m128i __A,__m128i __B)559*f4a2713aSLionel Sambuc _mm_comgt_epi8(__m128i __A, __m128i __B)
560*f4a2713aSLionel Sambuc {
561*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
562*f4a2713aSLionel Sambuc }
563*f4a2713aSLionel Sambuc
564*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi8(__m128i __A,__m128i __B)565*f4a2713aSLionel Sambuc _mm_comge_epi8(__m128i __A, __m128i __B)
566*f4a2713aSLionel Sambuc {
567*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
568*f4a2713aSLionel Sambuc }
569*f4a2713aSLionel Sambuc
570*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi8(__m128i __A,__m128i __B)571*f4a2713aSLionel Sambuc _mm_comeq_epi8(__m128i __A, __m128i __B)
572*f4a2713aSLionel Sambuc {
573*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
574*f4a2713aSLionel Sambuc }
575*f4a2713aSLionel Sambuc
576*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi8(__m128i __A,__m128i __B)577*f4a2713aSLionel Sambuc _mm_comneq_epi8(__m128i __A, __m128i __B)
578*f4a2713aSLionel Sambuc {
579*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
580*f4a2713aSLionel Sambuc }
581*f4a2713aSLionel Sambuc
582*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi8(__m128i __A,__m128i __B)583*f4a2713aSLionel Sambuc _mm_comfalse_epi8(__m128i __A, __m128i __B)
584*f4a2713aSLionel Sambuc {
585*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
586*f4a2713aSLionel Sambuc }
587*f4a2713aSLionel Sambuc
588*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi8(__m128i __A,__m128i __B)589*f4a2713aSLionel Sambuc _mm_comtrue_epi8(__m128i __A, __m128i __B)
590*f4a2713aSLionel Sambuc {
591*f4a2713aSLionel Sambuc return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
592*f4a2713aSLionel Sambuc }
593*f4a2713aSLionel Sambuc
594*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi16(__m128i __A,__m128i __B)595*f4a2713aSLionel Sambuc _mm_comlt_epi16(__m128i __A, __m128i __B)
596*f4a2713aSLionel Sambuc {
597*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
598*f4a2713aSLionel Sambuc }
599*f4a2713aSLionel Sambuc
600*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi16(__m128i __A,__m128i __B)601*f4a2713aSLionel Sambuc _mm_comle_epi16(__m128i __A, __m128i __B)
602*f4a2713aSLionel Sambuc {
603*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
604*f4a2713aSLionel Sambuc }
605*f4a2713aSLionel Sambuc
606*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi16(__m128i __A,__m128i __B)607*f4a2713aSLionel Sambuc _mm_comgt_epi16(__m128i __A, __m128i __B)
608*f4a2713aSLionel Sambuc {
609*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
610*f4a2713aSLionel Sambuc }
611*f4a2713aSLionel Sambuc
612*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi16(__m128i __A,__m128i __B)613*f4a2713aSLionel Sambuc _mm_comge_epi16(__m128i __A, __m128i __B)
614*f4a2713aSLionel Sambuc {
615*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
616*f4a2713aSLionel Sambuc }
617*f4a2713aSLionel Sambuc
618*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi16(__m128i __A,__m128i __B)619*f4a2713aSLionel Sambuc _mm_comeq_epi16(__m128i __A, __m128i __B)
620*f4a2713aSLionel Sambuc {
621*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
622*f4a2713aSLionel Sambuc }
623*f4a2713aSLionel Sambuc
624*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi16(__m128i __A,__m128i __B)625*f4a2713aSLionel Sambuc _mm_comneq_epi16(__m128i __A, __m128i __B)
626*f4a2713aSLionel Sambuc {
627*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
628*f4a2713aSLionel Sambuc }
629*f4a2713aSLionel Sambuc
630*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi16(__m128i __A,__m128i __B)631*f4a2713aSLionel Sambuc _mm_comfalse_epi16(__m128i __A, __m128i __B)
632*f4a2713aSLionel Sambuc {
633*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
634*f4a2713aSLionel Sambuc }
635*f4a2713aSLionel Sambuc
636*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi16(__m128i __A,__m128i __B)637*f4a2713aSLionel Sambuc _mm_comtrue_epi16(__m128i __A, __m128i __B)
638*f4a2713aSLionel Sambuc {
639*f4a2713aSLionel Sambuc return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
640*f4a2713aSLionel Sambuc }
641*f4a2713aSLionel Sambuc
642*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi32(__m128i __A,__m128i __B)643*f4a2713aSLionel Sambuc _mm_comlt_epi32(__m128i __A, __m128i __B)
644*f4a2713aSLionel Sambuc {
645*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
646*f4a2713aSLionel Sambuc }
647*f4a2713aSLionel Sambuc
648*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi32(__m128i __A,__m128i __B)649*f4a2713aSLionel Sambuc _mm_comle_epi32(__m128i __A, __m128i __B)
650*f4a2713aSLionel Sambuc {
651*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
652*f4a2713aSLionel Sambuc }
653*f4a2713aSLionel Sambuc
654*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi32(__m128i __A,__m128i __B)655*f4a2713aSLionel Sambuc _mm_comgt_epi32(__m128i __A, __m128i __B)
656*f4a2713aSLionel Sambuc {
657*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
658*f4a2713aSLionel Sambuc }
659*f4a2713aSLionel Sambuc
660*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi32(__m128i __A,__m128i __B)661*f4a2713aSLionel Sambuc _mm_comge_epi32(__m128i __A, __m128i __B)
662*f4a2713aSLionel Sambuc {
663*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
664*f4a2713aSLionel Sambuc }
665*f4a2713aSLionel Sambuc
666*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi32(__m128i __A,__m128i __B)667*f4a2713aSLionel Sambuc _mm_comeq_epi32(__m128i __A, __m128i __B)
668*f4a2713aSLionel Sambuc {
669*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
670*f4a2713aSLionel Sambuc }
671*f4a2713aSLionel Sambuc
672*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi32(__m128i __A,__m128i __B)673*f4a2713aSLionel Sambuc _mm_comneq_epi32(__m128i __A, __m128i __B)
674*f4a2713aSLionel Sambuc {
675*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
676*f4a2713aSLionel Sambuc }
677*f4a2713aSLionel Sambuc
678*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi32(__m128i __A,__m128i __B)679*f4a2713aSLionel Sambuc _mm_comfalse_epi32(__m128i __A, __m128i __B)
680*f4a2713aSLionel Sambuc {
681*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
682*f4a2713aSLionel Sambuc }
683*f4a2713aSLionel Sambuc
684*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi32(__m128i __A,__m128i __B)685*f4a2713aSLionel Sambuc _mm_comtrue_epi32(__m128i __A, __m128i __B)
686*f4a2713aSLionel Sambuc {
687*f4a2713aSLionel Sambuc return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
688*f4a2713aSLionel Sambuc }
689*f4a2713aSLionel Sambuc
690*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi64(__m128i __A,__m128i __B)691*f4a2713aSLionel Sambuc _mm_comlt_epi64(__m128i __A, __m128i __B)
692*f4a2713aSLionel Sambuc {
693*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
694*f4a2713aSLionel Sambuc }
695*f4a2713aSLionel Sambuc
696*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi64(__m128i __A,__m128i __B)697*f4a2713aSLionel Sambuc _mm_comle_epi64(__m128i __A, __m128i __B)
698*f4a2713aSLionel Sambuc {
699*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
700*f4a2713aSLionel Sambuc }
701*f4a2713aSLionel Sambuc
702*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi64(__m128i __A,__m128i __B)703*f4a2713aSLionel Sambuc _mm_comgt_epi64(__m128i __A, __m128i __B)
704*f4a2713aSLionel Sambuc {
705*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
706*f4a2713aSLionel Sambuc }
707*f4a2713aSLionel Sambuc
708*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi64(__m128i __A,__m128i __B)709*f4a2713aSLionel Sambuc _mm_comge_epi64(__m128i __A, __m128i __B)
710*f4a2713aSLionel Sambuc {
711*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
712*f4a2713aSLionel Sambuc }
713*f4a2713aSLionel Sambuc
714*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi64(__m128i __A,__m128i __B)715*f4a2713aSLionel Sambuc _mm_comeq_epi64(__m128i __A, __m128i __B)
716*f4a2713aSLionel Sambuc {
717*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
718*f4a2713aSLionel Sambuc }
719*f4a2713aSLionel Sambuc
720*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi64(__m128i __A,__m128i __B)721*f4a2713aSLionel Sambuc _mm_comneq_epi64(__m128i __A, __m128i __B)
722*f4a2713aSLionel Sambuc {
723*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
724*f4a2713aSLionel Sambuc }
725*f4a2713aSLionel Sambuc
726*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi64(__m128i __A,__m128i __B)727*f4a2713aSLionel Sambuc _mm_comfalse_epi64(__m128i __A, __m128i __B)
728*f4a2713aSLionel Sambuc {
729*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
730*f4a2713aSLionel Sambuc }
731*f4a2713aSLionel Sambuc
732*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi64(__m128i __A,__m128i __B)733*f4a2713aSLionel Sambuc _mm_comtrue_epi64(__m128i __A, __m128i __B)
734*f4a2713aSLionel Sambuc {
735*f4a2713aSLionel Sambuc return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
736*f4a2713aSLionel Sambuc }
737*f4a2713aSLionel Sambuc
738*f4a2713aSLionel Sambuc #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
739*f4a2713aSLionel Sambuc __m128d __X = (X); \
740*f4a2713aSLionel Sambuc __m128d __Y = (Y); \
741*f4a2713aSLionel Sambuc __m128i __C = (C); \
742*f4a2713aSLionel Sambuc (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
743*f4a2713aSLionel Sambuc (__v2di)__C, (I)); })
744*f4a2713aSLionel Sambuc
745*f4a2713aSLionel Sambuc #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
746*f4a2713aSLionel Sambuc __m256d __X = (X); \
747*f4a2713aSLionel Sambuc __m256d __Y = (Y); \
748*f4a2713aSLionel Sambuc __m256i __C = (C); \
749*f4a2713aSLionel Sambuc (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
750*f4a2713aSLionel Sambuc (__v4di)__C, (I)); })
751*f4a2713aSLionel Sambuc
752*f4a2713aSLionel Sambuc #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
753*f4a2713aSLionel Sambuc __m128 __X = (X); \
754*f4a2713aSLionel Sambuc __m128 __Y = (Y); \
755*f4a2713aSLionel Sambuc __m128i __C = (C); \
756*f4a2713aSLionel Sambuc (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
757*f4a2713aSLionel Sambuc (__v4si)__C, (I)); })
758*f4a2713aSLionel Sambuc
759*f4a2713aSLionel Sambuc #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
760*f4a2713aSLionel Sambuc __m256 __X = (X); \
761*f4a2713aSLionel Sambuc __m256 __Y = (Y); \
762*f4a2713aSLionel Sambuc __m256i __C = (C); \
763*f4a2713aSLionel Sambuc (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
764*f4a2713aSLionel Sambuc (__v8si)__C, (I)); })
765*f4a2713aSLionel Sambuc
766*f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_frcz_ss(__m128 __A)767*f4a2713aSLionel Sambuc _mm_frcz_ss(__m128 __A)
768*f4a2713aSLionel Sambuc {
769*f4a2713aSLionel Sambuc return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
770*f4a2713aSLionel Sambuc }
771*f4a2713aSLionel Sambuc
772*f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_frcz_sd(__m128d __A)773*f4a2713aSLionel Sambuc _mm_frcz_sd(__m128d __A)
774*f4a2713aSLionel Sambuc {
775*f4a2713aSLionel Sambuc return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
776*f4a2713aSLionel Sambuc }
777*f4a2713aSLionel Sambuc
778*f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_frcz_ps(__m128 __A)779*f4a2713aSLionel Sambuc _mm_frcz_ps(__m128 __A)
780*f4a2713aSLionel Sambuc {
781*f4a2713aSLionel Sambuc return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
782*f4a2713aSLionel Sambuc }
783*f4a2713aSLionel Sambuc
784*f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_frcz_pd(__m128d __A)785*f4a2713aSLionel Sambuc _mm_frcz_pd(__m128d __A)
786*f4a2713aSLionel Sambuc {
787*f4a2713aSLionel Sambuc return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
788*f4a2713aSLionel Sambuc }
789*f4a2713aSLionel Sambuc
790*f4a2713aSLionel Sambuc static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_frcz_ps(__m256 __A)791*f4a2713aSLionel Sambuc _mm256_frcz_ps(__m256 __A)
792*f4a2713aSLionel Sambuc {
793*f4a2713aSLionel Sambuc return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
794*f4a2713aSLionel Sambuc }
795*f4a2713aSLionel Sambuc
796*f4a2713aSLionel Sambuc static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_frcz_pd(__m256d __A)797*f4a2713aSLionel Sambuc _mm256_frcz_pd(__m256d __A)
798*f4a2713aSLionel Sambuc {
799*f4a2713aSLionel Sambuc return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
800*f4a2713aSLionel Sambuc }
801*f4a2713aSLionel Sambuc
802*f4a2713aSLionel Sambuc #endif /* __XOP__ */
803*f4a2713aSLionel Sambuc
804*f4a2713aSLionel Sambuc #endif /* __XOPINTRIN_H */
805