1*7330f729Sjoerg /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2*7330f729Sjoerg *
3*7330f729Sjoerg * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*7330f729Sjoerg * See https://llvm.org/LICENSE.txt for license information.
5*7330f729Sjoerg * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*7330f729Sjoerg *
7*7330f729Sjoerg *===-----------------------------------------------------------------------===
8*7330f729Sjoerg */
9*7330f729Sjoerg
10*7330f729Sjoerg #ifndef __X86INTRIN_H
11*7330f729Sjoerg #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
12*7330f729Sjoerg #endif
13*7330f729Sjoerg
14*7330f729Sjoerg #ifndef __XOPINTRIN_H
15*7330f729Sjoerg #define __XOPINTRIN_H
16*7330f729Sjoerg
17*7330f729Sjoerg #include <fma4intrin.h>
18*7330f729Sjoerg
19*7330f729Sjoerg /* Define the default attributes for the functions in this file. */
20*7330f729Sjoerg #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
21*7330f729Sjoerg #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
22*7330f729Sjoerg
23*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)24*7330f729Sjoerg _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
25*7330f729Sjoerg {
26*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
27*7330f729Sjoerg }
28*7330f729Sjoerg
29*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)30*7330f729Sjoerg _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
31*7330f729Sjoerg {
32*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
33*7330f729Sjoerg }
34*7330f729Sjoerg
35*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)36*7330f729Sjoerg _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
37*7330f729Sjoerg {
38*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
39*7330f729Sjoerg }
40*7330f729Sjoerg
41*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)42*7330f729Sjoerg _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
43*7330f729Sjoerg {
44*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
45*7330f729Sjoerg }
46*7330f729Sjoerg
47*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)48*7330f729Sjoerg _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
49*7330f729Sjoerg {
50*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
51*7330f729Sjoerg }
52*7330f729Sjoerg
53*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)54*7330f729Sjoerg _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
55*7330f729Sjoerg {
56*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
57*7330f729Sjoerg }
58*7330f729Sjoerg
59*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)60*7330f729Sjoerg _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
61*7330f729Sjoerg {
62*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
63*7330f729Sjoerg }
64*7330f729Sjoerg
65*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)66*7330f729Sjoerg _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
67*7330f729Sjoerg {
68*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
69*7330f729Sjoerg }
70*7330f729Sjoerg
71*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)72*7330f729Sjoerg _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
73*7330f729Sjoerg {
74*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
75*7330f729Sjoerg }
76*7330f729Sjoerg
77*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)78*7330f729Sjoerg _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
79*7330f729Sjoerg {
80*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
81*7330f729Sjoerg }
82*7330f729Sjoerg
83*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)84*7330f729Sjoerg _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
85*7330f729Sjoerg {
86*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
87*7330f729Sjoerg }
88*7330f729Sjoerg
89*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)90*7330f729Sjoerg _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
91*7330f729Sjoerg {
92*7330f729Sjoerg return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
93*7330f729Sjoerg }
94*7330f729Sjoerg
95*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)96*7330f729Sjoerg _mm_haddw_epi8(__m128i __A)
97*7330f729Sjoerg {
98*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
99*7330f729Sjoerg }
100*7330f729Sjoerg
101*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)102*7330f729Sjoerg _mm_haddd_epi8(__m128i __A)
103*7330f729Sjoerg {
104*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
105*7330f729Sjoerg }
106*7330f729Sjoerg
107*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)108*7330f729Sjoerg _mm_haddq_epi8(__m128i __A)
109*7330f729Sjoerg {
110*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
111*7330f729Sjoerg }
112*7330f729Sjoerg
113*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)114*7330f729Sjoerg _mm_haddd_epi16(__m128i __A)
115*7330f729Sjoerg {
116*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
117*7330f729Sjoerg }
118*7330f729Sjoerg
119*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)120*7330f729Sjoerg _mm_haddq_epi16(__m128i __A)
121*7330f729Sjoerg {
122*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
123*7330f729Sjoerg }
124*7330f729Sjoerg
125*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)126*7330f729Sjoerg _mm_haddq_epi32(__m128i __A)
127*7330f729Sjoerg {
128*7330f729Sjoerg return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
129*7330f729Sjoerg }
130*7330f729Sjoerg
131*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)132*7330f729Sjoerg _mm_haddw_epu8(__m128i __A)
133*7330f729Sjoerg {
134*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
135*7330f729Sjoerg }
136*7330f729Sjoerg
137*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)138*7330f729Sjoerg _mm_haddd_epu8(__m128i __A)
139*7330f729Sjoerg {
140*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
141*7330f729Sjoerg }
142*7330f729Sjoerg
143*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)144*7330f729Sjoerg _mm_haddq_epu8(__m128i __A)
145*7330f729Sjoerg {
146*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
147*7330f729Sjoerg }
148*7330f729Sjoerg
149*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)150*7330f729Sjoerg _mm_haddd_epu16(__m128i __A)
151*7330f729Sjoerg {
152*7330f729Sjoerg return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
153*7330f729Sjoerg }
154*7330f729Sjoerg
155*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)156*7330f729Sjoerg _mm_haddq_epu16(__m128i __A)
157*7330f729Sjoerg {
158*7330f729Sjoerg return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
159*7330f729Sjoerg }
160*7330f729Sjoerg
161*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)162*7330f729Sjoerg _mm_haddq_epu32(__m128i __A)
163*7330f729Sjoerg {
164*7330f729Sjoerg return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
165*7330f729Sjoerg }
166*7330f729Sjoerg
167*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)168*7330f729Sjoerg _mm_hsubw_epi8(__m128i __A)
169*7330f729Sjoerg {
170*7330f729Sjoerg return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
171*7330f729Sjoerg }
172*7330f729Sjoerg
173*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)174*7330f729Sjoerg _mm_hsubd_epi16(__m128i __A)
175*7330f729Sjoerg {
176*7330f729Sjoerg return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
177*7330f729Sjoerg }
178*7330f729Sjoerg
179*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)180*7330f729Sjoerg _mm_hsubq_epi32(__m128i __A)
181*7330f729Sjoerg {
182*7330f729Sjoerg return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
183*7330f729Sjoerg }
184*7330f729Sjoerg
185*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)186*7330f729Sjoerg _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
187*7330f729Sjoerg {
188*7330f729Sjoerg return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
189*7330f729Sjoerg }
190*7330f729Sjoerg
191*7330f729Sjoerg static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)192*7330f729Sjoerg _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
193*7330f729Sjoerg {
194*7330f729Sjoerg return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
195*7330f729Sjoerg }
196*7330f729Sjoerg
197*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)198*7330f729Sjoerg _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
199*7330f729Sjoerg {
200*7330f729Sjoerg return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
201*7330f729Sjoerg }
202*7330f729Sjoerg
203*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)204*7330f729Sjoerg _mm_rot_epi8(__m128i __A, __m128i __B)
205*7330f729Sjoerg {
206*7330f729Sjoerg return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
207*7330f729Sjoerg }
208*7330f729Sjoerg
209*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)210*7330f729Sjoerg _mm_rot_epi16(__m128i __A, __m128i __B)
211*7330f729Sjoerg {
212*7330f729Sjoerg return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
213*7330f729Sjoerg }
214*7330f729Sjoerg
215*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)216*7330f729Sjoerg _mm_rot_epi32(__m128i __A, __m128i __B)
217*7330f729Sjoerg {
218*7330f729Sjoerg return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
219*7330f729Sjoerg }
220*7330f729Sjoerg
221*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)222*7330f729Sjoerg _mm_rot_epi64(__m128i __A, __m128i __B)
223*7330f729Sjoerg {
224*7330f729Sjoerg return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
225*7330f729Sjoerg }
226*7330f729Sjoerg
227*7330f729Sjoerg #define _mm_roti_epi8(A, N) \
228*7330f729Sjoerg (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
229*7330f729Sjoerg
230*7330f729Sjoerg #define _mm_roti_epi16(A, N) \
231*7330f729Sjoerg (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
232*7330f729Sjoerg
233*7330f729Sjoerg #define _mm_roti_epi32(A, N) \
234*7330f729Sjoerg (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
235*7330f729Sjoerg
236*7330f729Sjoerg #define _mm_roti_epi64(A, N) \
237*7330f729Sjoerg (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
238*7330f729Sjoerg
239*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)240*7330f729Sjoerg _mm_shl_epi8(__m128i __A, __m128i __B)
241*7330f729Sjoerg {
242*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
243*7330f729Sjoerg }
244*7330f729Sjoerg
245*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)246*7330f729Sjoerg _mm_shl_epi16(__m128i __A, __m128i __B)
247*7330f729Sjoerg {
248*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
249*7330f729Sjoerg }
250*7330f729Sjoerg
251*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)252*7330f729Sjoerg _mm_shl_epi32(__m128i __A, __m128i __B)
253*7330f729Sjoerg {
254*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
255*7330f729Sjoerg }
256*7330f729Sjoerg
257*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)258*7330f729Sjoerg _mm_shl_epi64(__m128i __A, __m128i __B)
259*7330f729Sjoerg {
260*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
261*7330f729Sjoerg }
262*7330f729Sjoerg
263*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)264*7330f729Sjoerg _mm_sha_epi8(__m128i __A, __m128i __B)
265*7330f729Sjoerg {
266*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
267*7330f729Sjoerg }
268*7330f729Sjoerg
269*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)270*7330f729Sjoerg _mm_sha_epi16(__m128i __A, __m128i __B)
271*7330f729Sjoerg {
272*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
273*7330f729Sjoerg }
274*7330f729Sjoerg
275*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)276*7330f729Sjoerg _mm_sha_epi32(__m128i __A, __m128i __B)
277*7330f729Sjoerg {
278*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
279*7330f729Sjoerg }
280*7330f729Sjoerg
281*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)282*7330f729Sjoerg _mm_sha_epi64(__m128i __A, __m128i __B)
283*7330f729Sjoerg {
284*7330f729Sjoerg return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
285*7330f729Sjoerg }
286*7330f729Sjoerg
287*7330f729Sjoerg #define _mm_com_epu8(A, B, N) \
288*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
289*7330f729Sjoerg (__v16qi)(__m128i)(B), (N))
290*7330f729Sjoerg
291*7330f729Sjoerg #define _mm_com_epu16(A, B, N) \
292*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
293*7330f729Sjoerg (__v8hi)(__m128i)(B), (N))
294*7330f729Sjoerg
295*7330f729Sjoerg #define _mm_com_epu32(A, B, N) \
296*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
297*7330f729Sjoerg (__v4si)(__m128i)(B), (N))
298*7330f729Sjoerg
299*7330f729Sjoerg #define _mm_com_epu64(A, B, N) \
300*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
301*7330f729Sjoerg (__v2di)(__m128i)(B), (N))
302*7330f729Sjoerg
303*7330f729Sjoerg #define _mm_com_epi8(A, B, N) \
304*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
305*7330f729Sjoerg (__v16qi)(__m128i)(B), (N))
306*7330f729Sjoerg
307*7330f729Sjoerg #define _mm_com_epi16(A, B, N) \
308*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
309*7330f729Sjoerg (__v8hi)(__m128i)(B), (N))
310*7330f729Sjoerg
311*7330f729Sjoerg #define _mm_com_epi32(A, B, N) \
312*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
313*7330f729Sjoerg (__v4si)(__m128i)(B), (N))
314*7330f729Sjoerg
315*7330f729Sjoerg #define _mm_com_epi64(A, B, N) \
316*7330f729Sjoerg (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
317*7330f729Sjoerg (__v2di)(__m128i)(B), (N))
318*7330f729Sjoerg
319*7330f729Sjoerg #define _MM_PCOMCTRL_LT 0
320*7330f729Sjoerg #define _MM_PCOMCTRL_LE 1
321*7330f729Sjoerg #define _MM_PCOMCTRL_GT 2
322*7330f729Sjoerg #define _MM_PCOMCTRL_GE 3
323*7330f729Sjoerg #define _MM_PCOMCTRL_EQ 4
324*7330f729Sjoerg #define _MM_PCOMCTRL_NEQ 5
325*7330f729Sjoerg #define _MM_PCOMCTRL_FALSE 6
326*7330f729Sjoerg #define _MM_PCOMCTRL_TRUE 7
327*7330f729Sjoerg
328*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)329*7330f729Sjoerg _mm_comlt_epu8(__m128i __A, __m128i __B)
330*7330f729Sjoerg {
331*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
332*7330f729Sjoerg }
333*7330f729Sjoerg
334*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)335*7330f729Sjoerg _mm_comle_epu8(__m128i __A, __m128i __B)
336*7330f729Sjoerg {
337*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
338*7330f729Sjoerg }
339*7330f729Sjoerg
340*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)341*7330f729Sjoerg _mm_comgt_epu8(__m128i __A, __m128i __B)
342*7330f729Sjoerg {
343*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
344*7330f729Sjoerg }
345*7330f729Sjoerg
346*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)347*7330f729Sjoerg _mm_comge_epu8(__m128i __A, __m128i __B)
348*7330f729Sjoerg {
349*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
350*7330f729Sjoerg }
351*7330f729Sjoerg
352*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)353*7330f729Sjoerg _mm_comeq_epu8(__m128i __A, __m128i __B)
354*7330f729Sjoerg {
355*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
356*7330f729Sjoerg }
357*7330f729Sjoerg
358*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)359*7330f729Sjoerg _mm_comneq_epu8(__m128i __A, __m128i __B)
360*7330f729Sjoerg {
361*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
362*7330f729Sjoerg }
363*7330f729Sjoerg
364*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)365*7330f729Sjoerg _mm_comfalse_epu8(__m128i __A, __m128i __B)
366*7330f729Sjoerg {
367*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
368*7330f729Sjoerg }
369*7330f729Sjoerg
370*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)371*7330f729Sjoerg _mm_comtrue_epu8(__m128i __A, __m128i __B)
372*7330f729Sjoerg {
373*7330f729Sjoerg return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
374*7330f729Sjoerg }
375*7330f729Sjoerg
376*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)377*7330f729Sjoerg _mm_comlt_epu16(__m128i __A, __m128i __B)
378*7330f729Sjoerg {
379*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
380*7330f729Sjoerg }
381*7330f729Sjoerg
382*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)383*7330f729Sjoerg _mm_comle_epu16(__m128i __A, __m128i __B)
384*7330f729Sjoerg {
385*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
386*7330f729Sjoerg }
387*7330f729Sjoerg
388*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)389*7330f729Sjoerg _mm_comgt_epu16(__m128i __A, __m128i __B)
390*7330f729Sjoerg {
391*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
392*7330f729Sjoerg }
393*7330f729Sjoerg
394*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)395*7330f729Sjoerg _mm_comge_epu16(__m128i __A, __m128i __B)
396*7330f729Sjoerg {
397*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
398*7330f729Sjoerg }
399*7330f729Sjoerg
400*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)401*7330f729Sjoerg _mm_comeq_epu16(__m128i __A, __m128i __B)
402*7330f729Sjoerg {
403*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
404*7330f729Sjoerg }
405*7330f729Sjoerg
406*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)407*7330f729Sjoerg _mm_comneq_epu16(__m128i __A, __m128i __B)
408*7330f729Sjoerg {
409*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
410*7330f729Sjoerg }
411*7330f729Sjoerg
412*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)413*7330f729Sjoerg _mm_comfalse_epu16(__m128i __A, __m128i __B)
414*7330f729Sjoerg {
415*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
416*7330f729Sjoerg }
417*7330f729Sjoerg
418*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)419*7330f729Sjoerg _mm_comtrue_epu16(__m128i __A, __m128i __B)
420*7330f729Sjoerg {
421*7330f729Sjoerg return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
422*7330f729Sjoerg }
423*7330f729Sjoerg
424*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)425*7330f729Sjoerg _mm_comlt_epu32(__m128i __A, __m128i __B)
426*7330f729Sjoerg {
427*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
428*7330f729Sjoerg }
429*7330f729Sjoerg
430*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)431*7330f729Sjoerg _mm_comle_epu32(__m128i __A, __m128i __B)
432*7330f729Sjoerg {
433*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
434*7330f729Sjoerg }
435*7330f729Sjoerg
436*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)437*7330f729Sjoerg _mm_comgt_epu32(__m128i __A, __m128i __B)
438*7330f729Sjoerg {
439*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
440*7330f729Sjoerg }
441*7330f729Sjoerg
442*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)443*7330f729Sjoerg _mm_comge_epu32(__m128i __A, __m128i __B)
444*7330f729Sjoerg {
445*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
446*7330f729Sjoerg }
447*7330f729Sjoerg
448*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)449*7330f729Sjoerg _mm_comeq_epu32(__m128i __A, __m128i __B)
450*7330f729Sjoerg {
451*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
452*7330f729Sjoerg }
453*7330f729Sjoerg
454*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)455*7330f729Sjoerg _mm_comneq_epu32(__m128i __A, __m128i __B)
456*7330f729Sjoerg {
457*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
458*7330f729Sjoerg }
459*7330f729Sjoerg
460*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)461*7330f729Sjoerg _mm_comfalse_epu32(__m128i __A, __m128i __B)
462*7330f729Sjoerg {
463*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
464*7330f729Sjoerg }
465*7330f729Sjoerg
466*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)467*7330f729Sjoerg _mm_comtrue_epu32(__m128i __A, __m128i __B)
468*7330f729Sjoerg {
469*7330f729Sjoerg return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
470*7330f729Sjoerg }
471*7330f729Sjoerg
472*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)473*7330f729Sjoerg _mm_comlt_epu64(__m128i __A, __m128i __B)
474*7330f729Sjoerg {
475*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
476*7330f729Sjoerg }
477*7330f729Sjoerg
478*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)479*7330f729Sjoerg _mm_comle_epu64(__m128i __A, __m128i __B)
480*7330f729Sjoerg {
481*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
482*7330f729Sjoerg }
483*7330f729Sjoerg
484*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)485*7330f729Sjoerg _mm_comgt_epu64(__m128i __A, __m128i __B)
486*7330f729Sjoerg {
487*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
488*7330f729Sjoerg }
489*7330f729Sjoerg
490*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)491*7330f729Sjoerg _mm_comge_epu64(__m128i __A, __m128i __B)
492*7330f729Sjoerg {
493*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
494*7330f729Sjoerg }
495*7330f729Sjoerg
496*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)497*7330f729Sjoerg _mm_comeq_epu64(__m128i __A, __m128i __B)
498*7330f729Sjoerg {
499*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
500*7330f729Sjoerg }
501*7330f729Sjoerg
502*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)503*7330f729Sjoerg _mm_comneq_epu64(__m128i __A, __m128i __B)
504*7330f729Sjoerg {
505*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
506*7330f729Sjoerg }
507*7330f729Sjoerg
508*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)509*7330f729Sjoerg _mm_comfalse_epu64(__m128i __A, __m128i __B)
510*7330f729Sjoerg {
511*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
512*7330f729Sjoerg }
513*7330f729Sjoerg
514*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)515*7330f729Sjoerg _mm_comtrue_epu64(__m128i __A, __m128i __B)
516*7330f729Sjoerg {
517*7330f729Sjoerg return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
518*7330f729Sjoerg }
519*7330f729Sjoerg
520*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)521*7330f729Sjoerg _mm_comlt_epi8(__m128i __A, __m128i __B)
522*7330f729Sjoerg {
523*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
524*7330f729Sjoerg }
525*7330f729Sjoerg
526*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)527*7330f729Sjoerg _mm_comle_epi8(__m128i __A, __m128i __B)
528*7330f729Sjoerg {
529*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
530*7330f729Sjoerg }
531*7330f729Sjoerg
532*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)533*7330f729Sjoerg _mm_comgt_epi8(__m128i __A, __m128i __B)
534*7330f729Sjoerg {
535*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
536*7330f729Sjoerg }
537*7330f729Sjoerg
538*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)539*7330f729Sjoerg _mm_comge_epi8(__m128i __A, __m128i __B)
540*7330f729Sjoerg {
541*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
542*7330f729Sjoerg }
543*7330f729Sjoerg
544*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)545*7330f729Sjoerg _mm_comeq_epi8(__m128i __A, __m128i __B)
546*7330f729Sjoerg {
547*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
548*7330f729Sjoerg }
549*7330f729Sjoerg
550*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)551*7330f729Sjoerg _mm_comneq_epi8(__m128i __A, __m128i __B)
552*7330f729Sjoerg {
553*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
554*7330f729Sjoerg }
555*7330f729Sjoerg
556*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)557*7330f729Sjoerg _mm_comfalse_epi8(__m128i __A, __m128i __B)
558*7330f729Sjoerg {
559*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
560*7330f729Sjoerg }
561*7330f729Sjoerg
562*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)563*7330f729Sjoerg _mm_comtrue_epi8(__m128i __A, __m128i __B)
564*7330f729Sjoerg {
565*7330f729Sjoerg return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
566*7330f729Sjoerg }
567*7330f729Sjoerg
568*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)569*7330f729Sjoerg _mm_comlt_epi16(__m128i __A, __m128i __B)
570*7330f729Sjoerg {
571*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
572*7330f729Sjoerg }
573*7330f729Sjoerg
574*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)575*7330f729Sjoerg _mm_comle_epi16(__m128i __A, __m128i __B)
576*7330f729Sjoerg {
577*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
578*7330f729Sjoerg }
579*7330f729Sjoerg
580*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)581*7330f729Sjoerg _mm_comgt_epi16(__m128i __A, __m128i __B)
582*7330f729Sjoerg {
583*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
584*7330f729Sjoerg }
585*7330f729Sjoerg
586*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)587*7330f729Sjoerg _mm_comge_epi16(__m128i __A, __m128i __B)
588*7330f729Sjoerg {
589*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
590*7330f729Sjoerg }
591*7330f729Sjoerg
592*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)593*7330f729Sjoerg _mm_comeq_epi16(__m128i __A, __m128i __B)
594*7330f729Sjoerg {
595*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
596*7330f729Sjoerg }
597*7330f729Sjoerg
598*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)599*7330f729Sjoerg _mm_comneq_epi16(__m128i __A, __m128i __B)
600*7330f729Sjoerg {
601*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
602*7330f729Sjoerg }
603*7330f729Sjoerg
604*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)605*7330f729Sjoerg _mm_comfalse_epi16(__m128i __A, __m128i __B)
606*7330f729Sjoerg {
607*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
608*7330f729Sjoerg }
609*7330f729Sjoerg
610*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)611*7330f729Sjoerg _mm_comtrue_epi16(__m128i __A, __m128i __B)
612*7330f729Sjoerg {
613*7330f729Sjoerg return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
614*7330f729Sjoerg }
615*7330f729Sjoerg
616*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)617*7330f729Sjoerg _mm_comlt_epi32(__m128i __A, __m128i __B)
618*7330f729Sjoerg {
619*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
620*7330f729Sjoerg }
621*7330f729Sjoerg
622*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)623*7330f729Sjoerg _mm_comle_epi32(__m128i __A, __m128i __B)
624*7330f729Sjoerg {
625*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
626*7330f729Sjoerg }
627*7330f729Sjoerg
628*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)629*7330f729Sjoerg _mm_comgt_epi32(__m128i __A, __m128i __B)
630*7330f729Sjoerg {
631*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
632*7330f729Sjoerg }
633*7330f729Sjoerg
634*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)635*7330f729Sjoerg _mm_comge_epi32(__m128i __A, __m128i __B)
636*7330f729Sjoerg {
637*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
638*7330f729Sjoerg }
639*7330f729Sjoerg
640*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)641*7330f729Sjoerg _mm_comeq_epi32(__m128i __A, __m128i __B)
642*7330f729Sjoerg {
643*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
644*7330f729Sjoerg }
645*7330f729Sjoerg
646*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)647*7330f729Sjoerg _mm_comneq_epi32(__m128i __A, __m128i __B)
648*7330f729Sjoerg {
649*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
650*7330f729Sjoerg }
651*7330f729Sjoerg
652*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)653*7330f729Sjoerg _mm_comfalse_epi32(__m128i __A, __m128i __B)
654*7330f729Sjoerg {
655*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
656*7330f729Sjoerg }
657*7330f729Sjoerg
658*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)659*7330f729Sjoerg _mm_comtrue_epi32(__m128i __A, __m128i __B)
660*7330f729Sjoerg {
661*7330f729Sjoerg return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
662*7330f729Sjoerg }
663*7330f729Sjoerg
664*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)665*7330f729Sjoerg _mm_comlt_epi64(__m128i __A, __m128i __B)
666*7330f729Sjoerg {
667*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
668*7330f729Sjoerg }
669*7330f729Sjoerg
670*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)671*7330f729Sjoerg _mm_comle_epi64(__m128i __A, __m128i __B)
672*7330f729Sjoerg {
673*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
674*7330f729Sjoerg }
675*7330f729Sjoerg
676*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)677*7330f729Sjoerg _mm_comgt_epi64(__m128i __A, __m128i __B)
678*7330f729Sjoerg {
679*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
680*7330f729Sjoerg }
681*7330f729Sjoerg
682*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)683*7330f729Sjoerg _mm_comge_epi64(__m128i __A, __m128i __B)
684*7330f729Sjoerg {
685*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
686*7330f729Sjoerg }
687*7330f729Sjoerg
688*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)689*7330f729Sjoerg _mm_comeq_epi64(__m128i __A, __m128i __B)
690*7330f729Sjoerg {
691*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
692*7330f729Sjoerg }
693*7330f729Sjoerg
694*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)695*7330f729Sjoerg _mm_comneq_epi64(__m128i __A, __m128i __B)
696*7330f729Sjoerg {
697*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
698*7330f729Sjoerg }
699*7330f729Sjoerg
700*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)701*7330f729Sjoerg _mm_comfalse_epi64(__m128i __A, __m128i __B)
702*7330f729Sjoerg {
703*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
704*7330f729Sjoerg }
705*7330f729Sjoerg
706*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)707*7330f729Sjoerg _mm_comtrue_epi64(__m128i __A, __m128i __B)
708*7330f729Sjoerg {
709*7330f729Sjoerg return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
710*7330f729Sjoerg }
711*7330f729Sjoerg
712*7330f729Sjoerg #define _mm_permute2_pd(X, Y, C, I) \
713*7330f729Sjoerg (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
714*7330f729Sjoerg (__v2df)(__m128d)(Y), \
715*7330f729Sjoerg (__v2di)(__m128i)(C), (I))
716*7330f729Sjoerg
717*7330f729Sjoerg #define _mm256_permute2_pd(X, Y, C, I) \
718*7330f729Sjoerg (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
719*7330f729Sjoerg (__v4df)(__m256d)(Y), \
720*7330f729Sjoerg (__v4di)(__m256i)(C), (I))
721*7330f729Sjoerg
722*7330f729Sjoerg #define _mm_permute2_ps(X, Y, C, I) \
723*7330f729Sjoerg (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
724*7330f729Sjoerg (__v4si)(__m128i)(C), (I))
725*7330f729Sjoerg
726*7330f729Sjoerg #define _mm256_permute2_ps(X, Y, C, I) \
727*7330f729Sjoerg (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
728*7330f729Sjoerg (__v8sf)(__m256)(Y), \
729*7330f729Sjoerg (__v8si)(__m256i)(C), (I))
730*7330f729Sjoerg
731*7330f729Sjoerg static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)732*7330f729Sjoerg _mm_frcz_ss(__m128 __A)
733*7330f729Sjoerg {
734*7330f729Sjoerg return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
735*7330f729Sjoerg }
736*7330f729Sjoerg
737*7330f729Sjoerg static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)738*7330f729Sjoerg _mm_frcz_sd(__m128d __A)
739*7330f729Sjoerg {
740*7330f729Sjoerg return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
741*7330f729Sjoerg }
742*7330f729Sjoerg
743*7330f729Sjoerg static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)744*7330f729Sjoerg _mm_frcz_ps(__m128 __A)
745*7330f729Sjoerg {
746*7330f729Sjoerg return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
747*7330f729Sjoerg }
748*7330f729Sjoerg
749*7330f729Sjoerg static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)750*7330f729Sjoerg _mm_frcz_pd(__m128d __A)
751*7330f729Sjoerg {
752*7330f729Sjoerg return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
753*7330f729Sjoerg }
754*7330f729Sjoerg
755*7330f729Sjoerg static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_frcz_ps(__m256 __A)756*7330f729Sjoerg _mm256_frcz_ps(__m256 __A)
757*7330f729Sjoerg {
758*7330f729Sjoerg return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
759*7330f729Sjoerg }
760*7330f729Sjoerg
761*7330f729Sjoerg static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_frcz_pd(__m256d __A)762*7330f729Sjoerg _mm256_frcz_pd(__m256d __A)
763*7330f729Sjoerg {
764*7330f729Sjoerg return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
765*7330f729Sjoerg }
766*7330f729Sjoerg
767*7330f729Sjoerg #undef __DEFAULT_FN_ATTRS
768*7330f729Sjoerg #undef __DEFAULT_FN_ATTRS256
769*7330f729Sjoerg
770*7330f729Sjoerg #endif /* __XOPINTRIN_H */
771