xref: /llvm-project/clang/lib/Headers/avx512fintrin.h (revision 1312369afbeb2083094b3d34a88c346b22e86971)
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11 #endif
12 
13 #ifndef __AVX512FINTRIN_H
14 #define __AVX512FINTRIN_H
15 
16 typedef char __v64qi __attribute__((__vector_size__(64)));
17 typedef short __v32hi __attribute__((__vector_size__(64)));
18 typedef double __v8df __attribute__((__vector_size__(64)));
19 typedef float __v16sf __attribute__((__vector_size__(64)));
20 typedef long long __v8di __attribute__((__vector_size__(64)));
21 typedef int __v16si __attribute__((__vector_size__(64)));
22 
23 /* Unsigned types */
24 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28 
29 /* We need an explicitly signed variant for char. Note that this shouldn't
30  * appear in the interface though. */
31 typedef signed char __v64qs __attribute__((__vector_size__(64)));
32 
33 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36 
37 typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38 typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39 typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40 
41 typedef unsigned char __mmask8;
42 typedef unsigned short __mmask16;
43 
44 /* Rounding mode macros.  */
45 #define _MM_FROUND_TO_NEAREST_INT   0x00
46 #define _MM_FROUND_TO_NEG_INF       0x01
47 #define _MM_FROUND_TO_POS_INF       0x02
48 #define _MM_FROUND_TO_ZERO          0x03
49 #define _MM_FROUND_CUR_DIRECTION    0x04
50 
51 /* Constants for integer comparison predicates */
52 typedef enum {
53     _MM_CMPINT_EQ,      /* Equal */
54     _MM_CMPINT_LT,      /* Less than */
55     _MM_CMPINT_LE,      /* Less than or Equal */
56     _MM_CMPINT_UNUSED,
57     _MM_CMPINT_NE,      /* Not Equal */
58     _MM_CMPINT_NLT,     /* Not Less than */
59 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
60     _MM_CMPINT_NLE      /* Not Less than or Equal */
61 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
62 } _MM_CMPINT_ENUM;
63 
64 typedef enum
65 {
66   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
67   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
68   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
69   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
70   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
71   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
72   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
73   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
74   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
75   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
76   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
77   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
78   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
79   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
80   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
81   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
82   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
83   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
84   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
85   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
86   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
87   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
88   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
89   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
90   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
91   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
92   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
93   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
94   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
95   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
96   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
97   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
98   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
99   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
100   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
101   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
102   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
103   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
104   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
105   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
106   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
107   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
108   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
109   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
110   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
111   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
112   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
113   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
114   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
115   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
116   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
117   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
118   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
119   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
120   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
121   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
122   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
123   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
124   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
125   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
126   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
127   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
128   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
129   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
130   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
131   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
132   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
133   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
134   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
135   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
136   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
137   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
138   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
139   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
140   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
141   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
142   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
143   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
144   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
145   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
146   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
147   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
148   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
149   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
150   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
151   _MM_PERM_DDDD = 0xFF
152 } _MM_PERM_ENUM;
153 
154 typedef enum
155 {
156   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
157   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
158   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
159   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
160 } _MM_MANTISSA_NORM_ENUM;
161 
162 typedef enum
163 {
164   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
165   _MM_MANT_SIGN_zero,   /* sign = 0             */
166   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
167 } _MM_MANTISSA_SIGN_ENUM;
168 
169 /* Define the default attributes for the functions in this file. */
170 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
171 #define __DEFAULT_FN_ATTRS128                                                  \
172   __attribute__((__always_inline__, __nodebug__,                               \
173                  __target__("avx512f,no-evex512"), __min_vector_width__(128)))
174 #define __DEFAULT_FN_ATTRS                                                     \
175   __attribute__((__always_inline__, __nodebug__,                               \
176                  __target__("avx512f,no-evex512")))
177 
178 #if defined(__cplusplus) && (__cplusplus >= 201103L)
179 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
180 #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
181 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
182 #else
183 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
184 #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
185 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
186 #endif
187 
188 /* Create vectors with repeated elements */
189 
190 static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
191 _mm512_setzero_si512(void) {
192   return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0};
193 }
194 
195 #define _mm512_setzero_epi32 _mm512_setzero_si512
196 
197 static __inline__ __m512d __DEFAULT_FN_ATTRS512
198 _mm512_undefined_pd(void)
199 {
200   return (__m512d)__builtin_ia32_undef512();
201 }
202 
203 static __inline__ __m512 __DEFAULT_FN_ATTRS512
204 _mm512_undefined(void)
205 {
206   return (__m512)__builtin_ia32_undef512();
207 }
208 
209 static __inline__ __m512 __DEFAULT_FN_ATTRS512
210 _mm512_undefined_ps(void)
211 {
212   return (__m512)__builtin_ia32_undef512();
213 }
214 
215 static __inline__ __m512i __DEFAULT_FN_ATTRS512
216 _mm512_undefined_epi32(void)
217 {
218   return (__m512i)__builtin_ia32_undef512();
219 }
220 
221 static __inline__ __m512i __DEFAULT_FN_ATTRS512
222 _mm512_broadcastd_epi32 (__m128i __A)
223 {
224   return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
225                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
226 }
227 
228 static __inline__ __m512i __DEFAULT_FN_ATTRS512
229 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
230 {
231   return (__m512i)__builtin_ia32_selectd_512(__M,
232                                              (__v16si) _mm512_broadcastd_epi32(__A),
233                                              (__v16si) __O);
234 }
235 
236 static __inline__ __m512i __DEFAULT_FN_ATTRS512
237 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
238 {
239   return (__m512i)__builtin_ia32_selectd_512(__M,
240                                              (__v16si) _mm512_broadcastd_epi32(__A),
241                                              (__v16si) _mm512_setzero_si512());
242 }
243 
244 static __inline__ __m512i __DEFAULT_FN_ATTRS512
245 _mm512_broadcastq_epi64 (__m128i __A)
246 {
247   return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
248                                           0, 0, 0, 0, 0, 0, 0, 0);
249 }
250 
251 static __inline__ __m512i __DEFAULT_FN_ATTRS512
252 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
253 {
254   return (__m512i)__builtin_ia32_selectq_512(__M,
255                                              (__v8di) _mm512_broadcastq_epi64(__A),
256                                              (__v8di) __O);
257 
258 }
259 
260 static __inline__ __m512i __DEFAULT_FN_ATTRS512
261 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
262 {
263   return (__m512i)__builtin_ia32_selectq_512(__M,
264                                              (__v8di) _mm512_broadcastq_epi64(__A),
265                                              (__v8di) _mm512_setzero_si512());
266 }
267 
268 static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void) {
269   return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
270                                0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
271 }
272 
273 #define _mm512_setzero _mm512_setzero_ps
274 
275 static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
276 _mm512_setzero_pd(void) {
277   return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
278 }
279 
280 static __inline __m512 __DEFAULT_FN_ATTRS512
281 _mm512_set1_ps(float __w)
282 {
283   return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
284                                  __w, __w, __w, __w, __w, __w, __w, __w  };
285 }
286 
287 static __inline __m512d __DEFAULT_FN_ATTRS512
288 _mm512_set1_pd(double __w)
289 {
290   return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
291 }
292 
293 static __inline __m512i __DEFAULT_FN_ATTRS512
294 _mm512_set1_epi8(char __w)
295 {
296   return __extension__ (__m512i)(__v64qi){
297     __w, __w, __w, __w, __w, __w, __w, __w,
298     __w, __w, __w, __w, __w, __w, __w, __w,
299     __w, __w, __w, __w, __w, __w, __w, __w,
300     __w, __w, __w, __w, __w, __w, __w, __w,
301     __w, __w, __w, __w, __w, __w, __w, __w,
302     __w, __w, __w, __w, __w, __w, __w, __w,
303     __w, __w, __w, __w, __w, __w, __w, __w,
304     __w, __w, __w, __w, __w, __w, __w, __w  };
305 }
306 
307 static __inline __m512i __DEFAULT_FN_ATTRS512
308 _mm512_set1_epi16(short __w)
309 {
310   return __extension__ (__m512i)(__v32hi){
311     __w, __w, __w, __w, __w, __w, __w, __w,
312     __w, __w, __w, __w, __w, __w, __w, __w,
313     __w, __w, __w, __w, __w, __w, __w, __w,
314     __w, __w, __w, __w, __w, __w, __w, __w };
315 }
316 
317 static __inline __m512i __DEFAULT_FN_ATTRS512
318 _mm512_set1_epi32(int __s)
319 {
320   return __extension__ (__m512i)(__v16si){
321     __s, __s, __s, __s, __s, __s, __s, __s,
322     __s, __s, __s, __s, __s, __s, __s, __s };
323 }
324 
325 static __inline __m512i __DEFAULT_FN_ATTRS512
326 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
327 {
328   return (__m512i)__builtin_ia32_selectd_512(__M,
329                                              (__v16si)_mm512_set1_epi32(__A),
330                                              (__v16si)_mm512_setzero_si512());
331 }
332 
333 static __inline __m512i __DEFAULT_FN_ATTRS512
334 _mm512_set1_epi64(long long __d)
335 {
336   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
337 }
338 
339 static __inline __m512i __DEFAULT_FN_ATTRS512
340 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
341 {
342   return (__m512i)__builtin_ia32_selectq_512(__M,
343                                              (__v8di)_mm512_set1_epi64(__A),
344                                              (__v8di)_mm512_setzero_si512());
345 }
346 
347 static __inline__ __m512 __DEFAULT_FN_ATTRS512
348 _mm512_broadcastss_ps(__m128 __A)
349 {
350   return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
351                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
352 }
353 
354 static __inline __m512i __DEFAULT_FN_ATTRS512
355 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
356 {
357   return __extension__ (__m512i)(__v16si)
358    { __D, __C, __B, __A, __D, __C, __B, __A,
359      __D, __C, __B, __A, __D, __C, __B, __A };
360 }
361 
362 static __inline __m512i __DEFAULT_FN_ATTRS512
363 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
364        long long __D)
365 {
366   return __extension__ (__m512i) (__v8di)
367    { __D, __C, __B, __A, __D, __C, __B, __A };
368 }
369 
370 static __inline __m512d __DEFAULT_FN_ATTRS512
371 _mm512_set4_pd (double __A, double __B, double __C, double __D)
372 {
373   return __extension__ (__m512d)
374    { __D, __C, __B, __A, __D, __C, __B, __A };
375 }
376 
377 static __inline __m512 __DEFAULT_FN_ATTRS512
378 _mm512_set4_ps (float __A, float __B, float __C, float __D)
379 {
380   return __extension__ (__m512)
381    { __D, __C, __B, __A, __D, __C, __B, __A,
382      __D, __C, __B, __A, __D, __C, __B, __A };
383 }
384 
385 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
386   _mm512_set4_epi32((e3),(e2),(e1),(e0))
387 
388 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
389   _mm512_set4_epi64((e3),(e2),(e1),(e0))
390 
391 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
392   _mm512_set4_pd((e3),(e2),(e1),(e0))
393 
394 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
395   _mm512_set4_ps((e3),(e2),(e1),(e0))
396 
397 static __inline__ __m512d __DEFAULT_FN_ATTRS512
398 _mm512_broadcastsd_pd(__m128d __A)
399 {
400   return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
401                                           0, 0, 0, 0, 0, 0, 0, 0);
402 }
403 
404 /* Cast between vector types */
405 
406 static __inline __m512d __DEFAULT_FN_ATTRS512
407 _mm512_castpd256_pd512(__m256d __a)
408 {
409   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
410                                  1, 2, 3, 4, 5, 6, 7);
411 }
412 
413 static __inline __m512 __DEFAULT_FN_ATTRS512
414 _mm512_castps256_ps512(__m256 __a)
415 {
416   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
417                                  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
418 }
419 
420 static __inline __m128d __DEFAULT_FN_ATTRS512
421 _mm512_castpd512_pd128(__m512d __a)
422 {
423   return __builtin_shufflevector(__a, __a, 0, 1);
424 }
425 
426 static __inline __m256d __DEFAULT_FN_ATTRS512
427 _mm512_castpd512_pd256 (__m512d __A)
428 {
429   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
430 }
431 
432 static __inline __m128 __DEFAULT_FN_ATTRS512
433 _mm512_castps512_ps128(__m512 __a)
434 {
435   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
436 }
437 
438 static __inline __m256 __DEFAULT_FN_ATTRS512
439 _mm512_castps512_ps256 (__m512 __A)
440 {
441   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
442 }
443 
444 static __inline __m512 __DEFAULT_FN_ATTRS512
445 _mm512_castpd_ps (__m512d __A)
446 {
447   return (__m512) (__A);
448 }
449 
450 static __inline __m512i __DEFAULT_FN_ATTRS512
451 _mm512_castpd_si512 (__m512d __A)
452 {
453   return (__m512i) (__A);
454 }
455 
456 static __inline__ __m512d __DEFAULT_FN_ATTRS512
457 _mm512_castpd128_pd512 (__m128d __A)
458 {
459   __m256d __B = __builtin_nondeterministic_value(__B);
460   return __builtin_shufflevector(
461       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
462       __B, 0, 1, 2, 3, 4, 5, 6, 7);
463 }
464 
465 static __inline __m512d __DEFAULT_FN_ATTRS512
466 _mm512_castps_pd (__m512 __A)
467 {
468   return (__m512d) (__A);
469 }
470 
471 static __inline __m512i __DEFAULT_FN_ATTRS512
472 _mm512_castps_si512 (__m512 __A)
473 {
474   return (__m512i) (__A);
475 }
476 
477 static __inline__ __m512 __DEFAULT_FN_ATTRS512
478 _mm512_castps128_ps512 (__m128 __A)
479 {
480   __m256 __B = __builtin_nondeterministic_value(__B);
481   return __builtin_shufflevector(
482       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
483       __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
484 }
485 
486 static __inline__ __m512i __DEFAULT_FN_ATTRS512
487 _mm512_castsi128_si512 (__m128i __A)
488 {
489   __m256i __B = __builtin_nondeterministic_value(__B);
490   return __builtin_shufflevector(
491       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
492       __B, 0, 1, 2, 3, 4, 5, 6, 7);
493 }
494 
495 static __inline__ __m512i __DEFAULT_FN_ATTRS512
496 _mm512_castsi256_si512 (__m256i __A)
497 {
498    return  __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
499 }
500 
501 static __inline __m512 __DEFAULT_FN_ATTRS512
502 _mm512_castsi512_ps (__m512i __A)
503 {
504   return (__m512) (__A);
505 }
506 
507 static __inline __m512d __DEFAULT_FN_ATTRS512
508 _mm512_castsi512_pd (__m512i __A)
509 {
510   return (__m512d) (__A);
511 }
512 
513 static __inline __m128i __DEFAULT_FN_ATTRS512
514 _mm512_castsi512_si128 (__m512i __A)
515 {
516   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
517 }
518 
519 static __inline __m256i __DEFAULT_FN_ATTRS512
520 _mm512_castsi512_si256 (__m512i __A)
521 {
522   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
523 }
524 
525 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
526 _mm512_int2mask(int __a)
527 {
528   return (__mmask16)__a;
529 }
530 
531 static __inline__ int __DEFAULT_FN_ATTRS
532 _mm512_mask2int(__mmask16 __a)
533 {
534   return (int)__a;
535 }
536 
537 /// Constructs a 512-bit floating-point vector of [8 x double] from a
538 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
539 ///    contain the value of the source vector. The upper 384 bits are set
540 ///    to zero.
541 ///
542 /// \headerfile <x86intrin.h>
543 ///
544 /// This intrinsic has no corresponding instruction.
545 ///
546 /// \param __a
547 ///    A 128-bit vector of [2 x double].
548 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
549 ///    contain the value of the parameter. The upper 384 bits are set to zero.
550 static __inline __m512d __DEFAULT_FN_ATTRS512
551 _mm512_zextpd128_pd512(__m128d __a)
552 {
553   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
554 }
555 
556 /// Constructs a 512-bit floating-point vector of [8 x double] from a
557 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
558 ///    contain the value of the source vector. The upper 256 bits are set
559 ///    to zero.
560 ///
561 /// \headerfile <x86intrin.h>
562 ///
563 /// This intrinsic has no corresponding instruction.
564 ///
565 /// \param __a
566 ///    A 256-bit vector of [4 x double].
567 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
568 ///    contain the value of the parameter. The upper 256 bits are set to zero.
569 static __inline __m512d __DEFAULT_FN_ATTRS512
570 _mm512_zextpd256_pd512(__m256d __a)
571 {
572   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
573 }
574 
575 /// Constructs a 512-bit floating-point vector of [16 x float] from a
576 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
577 ///    the value of the source vector. The upper 384 bits are set to zero.
578 ///
579 /// \headerfile <x86intrin.h>
580 ///
581 /// This intrinsic has no corresponding instruction.
582 ///
583 /// \param __a
584 ///    A 128-bit vector of [4 x float].
585 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
586 ///    contain the value of the parameter. The upper 384 bits are set to zero.
587 static __inline __m512 __DEFAULT_FN_ATTRS512
588 _mm512_zextps128_ps512(__m128 __a)
589 {
590   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
591 }
592 
593 /// Constructs a 512-bit floating-point vector of [16 x float] from a
594 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
595 ///    the value of the source vector. The upper 256 bits are set to zero.
596 ///
597 /// \headerfile <x86intrin.h>
598 ///
599 /// This intrinsic has no corresponding instruction.
600 ///
601 /// \param __a
602 ///    A 256-bit vector of [8 x float].
603 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
604 ///    contain the value of the parameter. The upper 256 bits are set to zero.
605 static __inline __m512 __DEFAULT_FN_ATTRS512
606 _mm512_zextps256_ps512(__m256 __a)
607 {
608   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
609 }
610 
611 /// Constructs a 512-bit integer vector from a 128-bit integer vector.
612 ///    The lower 128 bits contain the value of the source vector. The upper
613 ///    384 bits are set to zero.
614 ///
615 /// \headerfile <x86intrin.h>
616 ///
617 /// This intrinsic has no corresponding instruction.
618 ///
619 /// \param __a
620 ///    A 128-bit integer vector.
621 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
622 ///    the parameter. The upper 384 bits are set to zero.
623 static __inline __m512i __DEFAULT_FN_ATTRS512
624 _mm512_zextsi128_si512(__m128i __a)
625 {
626   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
627 }
628 
629 /// Constructs a 512-bit integer vector from a 256-bit integer vector.
630 ///    The lower 256 bits contain the value of the source vector. The upper
631 ///    256 bits are set to zero.
632 ///
633 /// \headerfile <x86intrin.h>
634 ///
635 /// This intrinsic has no corresponding instruction.
636 ///
637 /// \param __a
638 ///    A 256-bit integer vector.
639 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
640 ///    the parameter. The upper 256 bits are set to zero.
641 static __inline __m512i __DEFAULT_FN_ATTRS512
642 _mm512_zextsi256_si512(__m256i __a)
643 {
644   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
645 }
646 
647 /* Bitwise operators */
648 static __inline__ __m512i __DEFAULT_FN_ATTRS512
649 _mm512_and_epi32(__m512i __a, __m512i __b)
650 {
651   return (__m512i)((__v16su)__a & (__v16su)__b);
652 }
653 
654 static __inline__ __m512i __DEFAULT_FN_ATTRS512
655 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
656 {
657   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
658                 (__v16si) _mm512_and_epi32(__a, __b),
659                 (__v16si) __src);
660 }
661 
662 static __inline__ __m512i __DEFAULT_FN_ATTRS512
663 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
664 {
665   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
666                                          __k, __a, __b);
667 }
668 
669 static __inline__ __m512i __DEFAULT_FN_ATTRS512
670 _mm512_and_epi64(__m512i __a, __m512i __b)
671 {
672   return (__m512i)((__v8du)__a & (__v8du)__b);
673 }
674 
675 static __inline__ __m512i __DEFAULT_FN_ATTRS512
676 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
677 {
678     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
679                 (__v8di) _mm512_and_epi64(__a, __b),
680                 (__v8di) __src);
681 }
682 
683 static __inline__ __m512i __DEFAULT_FN_ATTRS512
684 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
685 {
686   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
687                                          __k, __a, __b);
688 }
689 
690 static __inline__ __m512i __DEFAULT_FN_ATTRS512
691 _mm512_andnot_si512 (__m512i __A, __m512i __B)
692 {
693   return (__m512i)(~(__v8du)__A & (__v8du)__B);
694 }
695 
696 static __inline__ __m512i __DEFAULT_FN_ATTRS512
697 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
698 {
699   return (__m512i)(~(__v16su)__A & (__v16su)__B);
700 }
701 
702 static __inline__ __m512i __DEFAULT_FN_ATTRS512
703 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
704 {
705   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
706                                          (__v16si)_mm512_andnot_epi32(__A, __B),
707                                          (__v16si)__W);
708 }
709 
710 static __inline__ __m512i __DEFAULT_FN_ATTRS512
711 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
712 {
713   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
714                                            __U, __A, __B);
715 }
716 
717 static __inline__ __m512i __DEFAULT_FN_ATTRS512
718 _mm512_andnot_epi64(__m512i __A, __m512i __B)
719 {
720   return (__m512i)(~(__v8du)__A & (__v8du)__B);
721 }
722 
723 static __inline__ __m512i __DEFAULT_FN_ATTRS512
724 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
725 {
726   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
727                                           (__v8di)_mm512_andnot_epi64(__A, __B),
728                                           (__v8di)__W);
729 }
730 
731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
732 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
733 {
734   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
735                                            __U, __A, __B);
736 }
737 
738 static __inline__ __m512i __DEFAULT_FN_ATTRS512
739 _mm512_or_epi32(__m512i __a, __m512i __b)
740 {
741   return (__m512i)((__v16su)__a | (__v16su)__b);
742 }
743 
744 static __inline__ __m512i __DEFAULT_FN_ATTRS512
745 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
746 {
747   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
748                                              (__v16si)_mm512_or_epi32(__a, __b),
749                                              (__v16si)__src);
750 }
751 
752 static __inline__ __m512i __DEFAULT_FN_ATTRS512
753 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
754 {
755   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
756 }
757 
758 static __inline__ __m512i __DEFAULT_FN_ATTRS512
759 _mm512_or_epi64(__m512i __a, __m512i __b)
760 {
761   return (__m512i)((__v8du)__a | (__v8du)__b);
762 }
763 
764 static __inline__ __m512i __DEFAULT_FN_ATTRS512
765 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
766 {
767   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
768                                              (__v8di)_mm512_or_epi64(__a, __b),
769                                              (__v8di)__src);
770 }
771 
772 static __inline__ __m512i __DEFAULT_FN_ATTRS512
773 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
774 {
775   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
776 }
777 
778 static __inline__ __m512i __DEFAULT_FN_ATTRS512
779 _mm512_xor_epi32(__m512i __a, __m512i __b)
780 {
781   return (__m512i)((__v16su)__a ^ (__v16su)__b);
782 }
783 
784 static __inline__ __m512i __DEFAULT_FN_ATTRS512
785 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
786 {
787   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
788                                             (__v16si)_mm512_xor_epi32(__a, __b),
789                                             (__v16si)__src);
790 }
791 
792 static __inline__ __m512i __DEFAULT_FN_ATTRS512
793 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
794 {
795   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
796 }
797 
798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
799 _mm512_xor_epi64(__m512i __a, __m512i __b)
800 {
801   return (__m512i)((__v8du)__a ^ (__v8du)__b);
802 }
803 
804 static __inline__ __m512i __DEFAULT_FN_ATTRS512
805 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
806 {
807   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
808                                              (__v8di)_mm512_xor_epi64(__a, __b),
809                                              (__v8di)__src);
810 }
811 
812 static __inline__ __m512i __DEFAULT_FN_ATTRS512
813 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
814 {
815   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
816 }
817 
818 static __inline__ __m512i __DEFAULT_FN_ATTRS512
819 _mm512_and_si512(__m512i __a, __m512i __b)
820 {
821   return (__m512i)((__v8du)__a & (__v8du)__b);
822 }
823 
824 static __inline__ __m512i __DEFAULT_FN_ATTRS512
825 _mm512_or_si512(__m512i __a, __m512i __b)
826 {
827   return (__m512i)((__v8du)__a | (__v8du)__b);
828 }
829 
830 static __inline__ __m512i __DEFAULT_FN_ATTRS512
831 _mm512_xor_si512(__m512i __a, __m512i __b)
832 {
833   return (__m512i)((__v8du)__a ^ (__v8du)__b);
834 }
835 
836 /* Arithmetic */
837 
838 static __inline __m512d __DEFAULT_FN_ATTRS512
839 _mm512_add_pd(__m512d __a, __m512d __b)
840 {
841   return (__m512d)((__v8df)__a + (__v8df)__b);
842 }
843 
844 static __inline __m512 __DEFAULT_FN_ATTRS512
845 _mm512_add_ps(__m512 __a, __m512 __b)
846 {
847   return (__m512)((__v16sf)__a + (__v16sf)__b);
848 }
849 
850 static __inline __m512d __DEFAULT_FN_ATTRS512
851 _mm512_mul_pd(__m512d __a, __m512d __b)
852 {
853   return (__m512d)((__v8df)__a * (__v8df)__b);
854 }
855 
856 static __inline __m512 __DEFAULT_FN_ATTRS512
857 _mm512_mul_ps(__m512 __a, __m512 __b)
858 {
859   return (__m512)((__v16sf)__a * (__v16sf)__b);
860 }
861 
862 static __inline __m512d __DEFAULT_FN_ATTRS512
863 _mm512_sub_pd(__m512d __a, __m512d __b)
864 {
865   return (__m512d)((__v8df)__a - (__v8df)__b);
866 }
867 
868 static __inline __m512 __DEFAULT_FN_ATTRS512
869 _mm512_sub_ps(__m512 __a, __m512 __b)
870 {
871   return (__m512)((__v16sf)__a - (__v16sf)__b);
872 }
873 
874 static __inline__ __m512i __DEFAULT_FN_ATTRS512
875 _mm512_add_epi64 (__m512i __A, __m512i __B)
876 {
877   return (__m512i) ((__v8du) __A + (__v8du) __B);
878 }
879 
880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
881 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
882 {
883   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
884                                              (__v8di)_mm512_add_epi64(__A, __B),
885                                              (__v8di)__W);
886 }
887 
888 static __inline__ __m512i __DEFAULT_FN_ATTRS512
889 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
890 {
891   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
892                                              (__v8di)_mm512_add_epi64(__A, __B),
893                                              (__v8di)_mm512_setzero_si512());
894 }
895 
896 static __inline__ __m512i __DEFAULT_FN_ATTRS512
897 _mm512_sub_epi64 (__m512i __A, __m512i __B)
898 {
899   return (__m512i) ((__v8du) __A - (__v8du) __B);
900 }
901 
902 static __inline__ __m512i __DEFAULT_FN_ATTRS512
903 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
904 {
905   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
906                                              (__v8di)_mm512_sub_epi64(__A, __B),
907                                              (__v8di)__W);
908 }
909 
910 static __inline__ __m512i __DEFAULT_FN_ATTRS512
911 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
912 {
913   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
914                                              (__v8di)_mm512_sub_epi64(__A, __B),
915                                              (__v8di)_mm512_setzero_si512());
916 }
917 
918 static __inline__ __m512i __DEFAULT_FN_ATTRS512
919 _mm512_add_epi32 (__m512i __A, __m512i __B)
920 {
921   return (__m512i) ((__v16su) __A + (__v16su) __B);
922 }
923 
924 static __inline__ __m512i __DEFAULT_FN_ATTRS512
925 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
926 {
927   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
928                                              (__v16si)_mm512_add_epi32(__A, __B),
929                                              (__v16si)__W);
930 }
931 
932 static __inline__ __m512i __DEFAULT_FN_ATTRS512
933 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
934 {
935   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
936                                              (__v16si)_mm512_add_epi32(__A, __B),
937                                              (__v16si)_mm512_setzero_si512());
938 }
939 
940 static __inline__ __m512i __DEFAULT_FN_ATTRS512
941 _mm512_sub_epi32 (__m512i __A, __m512i __B)
942 {
943   return (__m512i) ((__v16su) __A - (__v16su) __B);
944 }
945 
946 static __inline__ __m512i __DEFAULT_FN_ATTRS512
947 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
948 {
949   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
950                                              (__v16si)_mm512_sub_epi32(__A, __B),
951                                              (__v16si)__W);
952 }
953 
954 static __inline__ __m512i __DEFAULT_FN_ATTRS512
955 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
956 {
957   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
958                                              (__v16si)_mm512_sub_epi32(__A, __B),
959                                              (__v16si)_mm512_setzero_si512());
960 }
961 
962 #define _mm512_max_round_pd(A, B, R) \
963   ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
964                                     (__v8df)(__m512d)(B), (int)(R)))
965 
966 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
967   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
968                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
969                                    (__v8df)(W)))
970 
971 #define _mm512_maskz_max_round_pd(U, A, B, R) \
972   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
973                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
974                                    (__v8df)_mm512_setzero_pd()))
975 
976 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
977 _mm512_max_pd(__m512d __A, __m512d __B)
978 {
979   return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
980                                            _MM_FROUND_CUR_DIRECTION);
981 }
982 
983 static __inline__ __m512d __DEFAULT_FN_ATTRS512
984 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
985 {
986   return (__m512d)__builtin_ia32_selectpd_512(__U,
987                                               (__v8df)_mm512_max_pd(__A, __B),
988                                               (__v8df)__W);
989 }
990 
991 static __inline__ __m512d __DEFAULT_FN_ATTRS512
992 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
993 {
994   return (__m512d)__builtin_ia32_selectpd_512(__U,
995                                               (__v8df)_mm512_max_pd(__A, __B),
996                                               (__v8df)_mm512_setzero_pd());
997 }
998 
999 #define _mm512_max_round_ps(A, B, R) \
1000   ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
1001                                    (__v16sf)(__m512)(B), (int)(R)))
1002 
1003 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
1004   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1005                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1006                                   (__v16sf)(W)))
1007 
1008 #define _mm512_maskz_max_round_ps(U, A, B, R) \
1009   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1010                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1011                                   (__v16sf)_mm512_setzero_ps()))
1012 
1013 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1014 _mm512_max_ps(__m512 __A, __m512 __B)
1015 {
1016   return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1017                                           _MM_FROUND_CUR_DIRECTION);
1018 }
1019 
1020 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1021 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1022 {
1023   return (__m512)__builtin_ia32_selectps_512(__U,
1024                                              (__v16sf)_mm512_max_ps(__A, __B),
1025                                              (__v16sf)__W);
1026 }
1027 
1028 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1029 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1030 {
1031   return (__m512)__builtin_ia32_selectps_512(__U,
1032                                              (__v16sf)_mm512_max_ps(__A, __B),
1033                                              (__v16sf)_mm512_setzero_ps());
1034 }
1035 
1036 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1037 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1038   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1039                 (__v4sf) __B,
1040                 (__v4sf) __W,
1041                 (__mmask8) __U,
1042                 _MM_FROUND_CUR_DIRECTION);
1043 }
1044 
1045 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1046 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1047   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1048                 (__v4sf) __B,
1049                 (__v4sf)  _mm_setzero_ps (),
1050                 (__mmask8) __U,
1051                 _MM_FROUND_CUR_DIRECTION);
1052 }
1053 
1054 #define _mm_max_round_ss(A, B, R) \
1055   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1056                                            (__v4sf)(__m128)(B), \
1057                                            (__v4sf)_mm_setzero_ps(), \
1058                                            (__mmask8)-1, (int)(R)))
1059 
1060 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1061   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1062                                            (__v4sf)(__m128)(B), \
1063                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1064                                            (int)(R)))
1065 
1066 #define _mm_maskz_max_round_ss(U, A, B, R) \
1067   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1068                                            (__v4sf)(__m128)(B), \
1069                                            (__v4sf)_mm_setzero_ps(), \
1070                                            (__mmask8)(U), (int)(R)))
1071 
1072 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1073 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1074   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1075                 (__v2df) __B,
1076                 (__v2df) __W,
1077                 (__mmask8) __U,
1078                 _MM_FROUND_CUR_DIRECTION);
1079 }
1080 
1081 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1082 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1083   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1084                 (__v2df) __B,
1085                 (__v2df)  _mm_setzero_pd (),
1086                 (__mmask8) __U,
1087                 _MM_FROUND_CUR_DIRECTION);
1088 }
1089 
1090 #define _mm_max_round_sd(A, B, R) \
1091   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1092                                             (__v2df)(__m128d)(B), \
1093                                             (__v2df)_mm_setzero_pd(), \
1094                                             (__mmask8)-1, (int)(R)))
1095 
1096 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1097   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1098                                             (__v2df)(__m128d)(B), \
1099                                             (__v2df)(__m128d)(W), \
1100                                             (__mmask8)(U), (int)(R)))
1101 
1102 #define _mm_maskz_max_round_sd(U, A, B, R) \
1103   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1104                                             (__v2df)(__m128d)(B), \
1105                                             (__v2df)_mm_setzero_pd(), \
1106                                             (__mmask8)(U), (int)(R)))
1107 
1108 static __inline __m512i
1109 __DEFAULT_FN_ATTRS512
1110 _mm512_max_epi32(__m512i __A, __m512i __B)
1111 {
1112   return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1113 }
1114 
1115 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1116 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1117 {
1118   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1119                                             (__v16si)_mm512_max_epi32(__A, __B),
1120                                             (__v16si)__W);
1121 }
1122 
1123 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1124 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1125 {
1126   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1127                                             (__v16si)_mm512_max_epi32(__A, __B),
1128                                             (__v16si)_mm512_setzero_si512());
1129 }
1130 
1131 static __inline __m512i __DEFAULT_FN_ATTRS512
1132 _mm512_max_epu32(__m512i __A, __m512i __B)
1133 {
1134   return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1135 }
1136 
1137 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1138 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1139 {
1140   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1141                                             (__v16si)_mm512_max_epu32(__A, __B),
1142                                             (__v16si)__W);
1143 }
1144 
1145 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1146 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1147 {
1148   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1149                                             (__v16si)_mm512_max_epu32(__A, __B),
1150                                             (__v16si)_mm512_setzero_si512());
1151 }
1152 
1153 static __inline __m512i __DEFAULT_FN_ATTRS512
1154 _mm512_max_epi64(__m512i __A, __m512i __B)
1155 {
1156   return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1157 }
1158 
1159 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1160 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1161 {
1162   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1163                                              (__v8di)_mm512_max_epi64(__A, __B),
1164                                              (__v8di)__W);
1165 }
1166 
1167 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1168 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1169 {
1170   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1171                                              (__v8di)_mm512_max_epi64(__A, __B),
1172                                              (__v8di)_mm512_setzero_si512());
1173 }
1174 
1175 static __inline __m512i __DEFAULT_FN_ATTRS512
1176 _mm512_max_epu64(__m512i __A, __m512i __B)
1177 {
1178   return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1179 }
1180 
1181 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1182 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1183 {
1184   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1185                                              (__v8di)_mm512_max_epu64(__A, __B),
1186                                              (__v8di)__W);
1187 }
1188 
1189 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1190 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1191 {
1192   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1193                                              (__v8di)_mm512_max_epu64(__A, __B),
1194                                              (__v8di)_mm512_setzero_si512());
1195 }
1196 
1197 #define _mm512_min_round_pd(A, B, R) \
1198   ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1199                                     (__v8df)(__m512d)(B), (int)(R)))
1200 
1201 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1202   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1203                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1204                                    (__v8df)(W)))
1205 
1206 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1207   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1208                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1209                                    (__v8df)_mm512_setzero_pd()))
1210 
1211 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1212 _mm512_min_pd(__m512d __A, __m512d __B)
1213 {
1214   return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1215                                            _MM_FROUND_CUR_DIRECTION);
1216 }
1217 
1218 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1219 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1220 {
1221   return (__m512d)__builtin_ia32_selectpd_512(__U,
1222                                               (__v8df)_mm512_min_pd(__A, __B),
1223                                               (__v8df)__W);
1224 }
1225 
1226 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1227 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1228 {
1229   return (__m512d)__builtin_ia32_selectpd_512(__U,
1230                                               (__v8df)_mm512_min_pd(__A, __B),
1231                                               (__v8df)_mm512_setzero_pd());
1232 }
1233 
1234 #define _mm512_min_round_ps(A, B, R) \
1235   ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1236                                    (__v16sf)(__m512)(B), (int)(R)))
1237 
1238 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1239   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1240                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1241                                   (__v16sf)(W)))
1242 
1243 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1244   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1245                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1246                                   (__v16sf)_mm512_setzero_ps()))
1247 
1248 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1249 _mm512_min_ps(__m512 __A, __m512 __B)
1250 {
1251   return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1252                                           _MM_FROUND_CUR_DIRECTION);
1253 }
1254 
1255 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1256 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1257 {
1258   return (__m512)__builtin_ia32_selectps_512(__U,
1259                                              (__v16sf)_mm512_min_ps(__A, __B),
1260                                              (__v16sf)__W);
1261 }
1262 
1263 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1264 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1265 {
1266   return (__m512)__builtin_ia32_selectps_512(__U,
1267                                              (__v16sf)_mm512_min_ps(__A, __B),
1268                                              (__v16sf)_mm512_setzero_ps());
1269 }
1270 
1271 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1272 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1273   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1274                 (__v4sf) __B,
1275                 (__v4sf) __W,
1276                 (__mmask8) __U,
1277                 _MM_FROUND_CUR_DIRECTION);
1278 }
1279 
1280 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1281 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1282   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1283                 (__v4sf) __B,
1284                 (__v4sf)  _mm_setzero_ps (),
1285                 (__mmask8) __U,
1286                 _MM_FROUND_CUR_DIRECTION);
1287 }
1288 
1289 #define _mm_min_round_ss(A, B, R) \
1290   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1291                                            (__v4sf)(__m128)(B), \
1292                                            (__v4sf)_mm_setzero_ps(), \
1293                                            (__mmask8)-1, (int)(R)))
1294 
1295 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1296   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1297                                            (__v4sf)(__m128)(B), \
1298                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1299                                            (int)(R)))
1300 
1301 #define _mm_maskz_min_round_ss(U, A, B, R) \
1302   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1303                                            (__v4sf)(__m128)(B), \
1304                                            (__v4sf)_mm_setzero_ps(), \
1305                                            (__mmask8)(U), (int)(R)))
1306 
1307 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1308 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1309   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1310                 (__v2df) __B,
1311                 (__v2df) __W,
1312                 (__mmask8) __U,
1313                 _MM_FROUND_CUR_DIRECTION);
1314 }
1315 
1316 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1317 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1318   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1319                 (__v2df) __B,
1320                 (__v2df)  _mm_setzero_pd (),
1321                 (__mmask8) __U,
1322                 _MM_FROUND_CUR_DIRECTION);
1323 }
1324 
1325 #define _mm_min_round_sd(A, B, R) \
1326   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1327                                             (__v2df)(__m128d)(B), \
1328                                             (__v2df)_mm_setzero_pd(), \
1329                                             (__mmask8)-1, (int)(R)))
1330 
1331 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1332   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1333                                             (__v2df)(__m128d)(B), \
1334                                             (__v2df)(__m128d)(W), \
1335                                             (__mmask8)(U), (int)(R)))
1336 
1337 #define _mm_maskz_min_round_sd(U, A, B, R) \
1338   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1339                                             (__v2df)(__m128d)(B), \
1340                                             (__v2df)_mm_setzero_pd(), \
1341                                             (__mmask8)(U), (int)(R)))
1342 
1343 static __inline __m512i
1344 __DEFAULT_FN_ATTRS512
1345 _mm512_min_epi32(__m512i __A, __m512i __B)
1346 {
1347   return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1348 }
1349 
1350 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1351 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1352 {
1353   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1354                                             (__v16si)_mm512_min_epi32(__A, __B),
1355                                             (__v16si)__W);
1356 }
1357 
1358 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1359 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1360 {
1361   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1362                                             (__v16si)_mm512_min_epi32(__A, __B),
1363                                             (__v16si)_mm512_setzero_si512());
1364 }
1365 
1366 static __inline __m512i __DEFAULT_FN_ATTRS512
1367 _mm512_min_epu32(__m512i __A, __m512i __B)
1368 {
1369   return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1370 }
1371 
1372 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1373 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1374 {
1375   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1376                                             (__v16si)_mm512_min_epu32(__A, __B),
1377                                             (__v16si)__W);
1378 }
1379 
1380 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1381 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1382 {
1383   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1384                                             (__v16si)_mm512_min_epu32(__A, __B),
1385                                             (__v16si)_mm512_setzero_si512());
1386 }
1387 
1388 static __inline __m512i __DEFAULT_FN_ATTRS512
1389 _mm512_min_epi64(__m512i __A, __m512i __B)
1390 {
1391   return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1392 }
1393 
1394 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1395 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1396 {
1397   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1398                                              (__v8di)_mm512_min_epi64(__A, __B),
1399                                              (__v8di)__W);
1400 }
1401 
1402 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1403 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1404 {
1405   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1406                                              (__v8di)_mm512_min_epi64(__A, __B),
1407                                              (__v8di)_mm512_setzero_si512());
1408 }
1409 
1410 static __inline __m512i __DEFAULT_FN_ATTRS512
1411 _mm512_min_epu64(__m512i __A, __m512i __B)
1412 {
1413   return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1414 }
1415 
1416 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1417 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1418 {
1419   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1420                                              (__v8di)_mm512_min_epu64(__A, __B),
1421                                              (__v8di)__W);
1422 }
1423 
1424 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1425 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1426 {
1427   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1428                                              (__v8di)_mm512_min_epu64(__A, __B),
1429                                              (__v8di)_mm512_setzero_si512());
1430 }
1431 
1432 static __inline __m512i __DEFAULT_FN_ATTRS512
1433 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1434 {
1435   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1436 }
1437 
1438 static __inline __m512i __DEFAULT_FN_ATTRS512
1439 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1440 {
1441   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1442                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1443                                              (__v8di)__W);
1444 }
1445 
1446 static __inline __m512i __DEFAULT_FN_ATTRS512
1447 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1448 {
1449   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1450                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1451                                              (__v8di)_mm512_setzero_si512 ());
1452 }
1453 
1454 static __inline __m512i __DEFAULT_FN_ATTRS512
1455 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1456 {
1457   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1458 }
1459 
1460 static __inline __m512i __DEFAULT_FN_ATTRS512
1461 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1462 {
1463   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1464                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1465                                              (__v8di)__W);
1466 }
1467 
1468 static __inline __m512i __DEFAULT_FN_ATTRS512
1469 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1470 {
1471   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1472                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1473                                              (__v8di)_mm512_setzero_si512 ());
1474 }
1475 
1476 static __inline __m512i __DEFAULT_FN_ATTRS512
1477 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1478 {
1479   return (__m512i) ((__v16su) __A * (__v16su) __B);
1480 }
1481 
1482 static __inline __m512i __DEFAULT_FN_ATTRS512
1483 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1484 {
1485   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1486                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1487                                              (__v16si)_mm512_setzero_si512());
1488 }
1489 
1490 static __inline __m512i __DEFAULT_FN_ATTRS512
1491 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1492 {
1493   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1494                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1495                                              (__v16si)__W);
1496 }
1497 
1498 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1500   return (__m512i) ((__v8du) __A * (__v8du) __B);
1501 }
1502 
1503 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1504 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1505   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1506                                              (__v8di)_mm512_mullox_epi64(__A, __B),
1507                                              (__v8di)__W);
1508 }
1509 
1510 #define _mm512_sqrt_round_pd(A, R) \
1511   ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1512 
1513 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1514   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1515                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1516                                        (__v8df)(__m512d)(W)))
1517 
1518 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1519   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1520                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1521                                        (__v8df)_mm512_setzero_pd()))
1522 
1523 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1524 _mm512_sqrt_pd(__m512d __A)
1525 {
1526   return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1527                                            _MM_FROUND_CUR_DIRECTION);
1528 }
1529 
1530 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1531 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1532 {
1533   return (__m512d)__builtin_ia32_selectpd_512(__U,
1534                                               (__v8df)_mm512_sqrt_pd(__A),
1535                                               (__v8df)__W);
1536 }
1537 
1538 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1539 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1540 {
1541   return (__m512d)__builtin_ia32_selectpd_512(__U,
1542                                               (__v8df)_mm512_sqrt_pd(__A),
1543                                               (__v8df)_mm512_setzero_pd());
1544 }
1545 
1546 #define _mm512_sqrt_round_ps(A, R) \
1547   ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1548 
1549 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1550   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1551                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1552                                       (__v16sf)(__m512)(W)))
1553 
1554 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1555   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1556                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1557                                       (__v16sf)_mm512_setzero_ps()))
1558 
1559 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1560 _mm512_sqrt_ps(__m512 __A)
1561 {
1562   return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1563                                           _MM_FROUND_CUR_DIRECTION);
1564 }
1565 
1566 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1567 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1568 {
1569   return (__m512)__builtin_ia32_selectps_512(__U,
1570                                              (__v16sf)_mm512_sqrt_ps(__A),
1571                                              (__v16sf)__W);
1572 }
1573 
1574 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1575 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1576 {
1577   return (__m512)__builtin_ia32_selectps_512(__U,
1578                                              (__v16sf)_mm512_sqrt_ps(__A),
1579                                              (__v16sf)_mm512_setzero_ps());
1580 }
1581 
1582 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1583 _mm512_rsqrt14_pd(__m512d __A)
1584 {
1585   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1586                  (__v8df)
1587                  _mm512_setzero_pd (),
1588                  (__mmask8) -1);}
1589 
1590 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1591 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1592 {
1593   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1594                   (__v8df) __W,
1595                   (__mmask8) __U);
1596 }
1597 
1598 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1599 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1600 {
1601   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1602                   (__v8df)
1603                   _mm512_setzero_pd (),
1604                   (__mmask8) __U);
1605 }
1606 
1607 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1608 _mm512_rsqrt14_ps(__m512 __A)
1609 {
1610   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1611                 (__v16sf)
1612                 _mm512_setzero_ps (),
1613                 (__mmask16) -1);
1614 }
1615 
1616 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1617 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1618 {
1619   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1620                  (__v16sf) __W,
1621                  (__mmask16) __U);
1622 }
1623 
1624 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1625 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1626 {
1627   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1628                  (__v16sf)
1629                  _mm512_setzero_ps (),
1630                  (__mmask16) __U);
1631 }
1632 
1633 static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1634 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1635 {
1636   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1637              (__v4sf) __B,
1638              (__v4sf)
1639              _mm_setzero_ps (),
1640              (__mmask8) -1);
1641 }
1642 
1643 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1644 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1645 {
1646  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1647           (__v4sf) __B,
1648           (__v4sf) __W,
1649           (__mmask8) __U);
1650 }
1651 
1652 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1653 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1654 {
1655  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1656           (__v4sf) __B,
1657           (__v4sf) _mm_setzero_ps (),
1658           (__mmask8) __U);
1659 }
1660 
1661 static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1662 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1663 {
1664   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1665               (__v2df) __B,
1666               (__v2df)
1667               _mm_setzero_pd (),
1668               (__mmask8) -1);
1669 }
1670 
1671 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1672 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1673 {
1674  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1675           (__v2df) __B,
1676           (__v2df) __W,
1677           (__mmask8) __U);
1678 }
1679 
1680 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1681 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1682 {
1683  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1684           (__v2df) __B,
1685           (__v2df) _mm_setzero_pd (),
1686           (__mmask8) __U);
1687 }
1688 
1689 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1690 _mm512_rcp14_pd(__m512d __A)
1691 {
1692   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1693                (__v8df)
1694                _mm512_setzero_pd (),
1695                (__mmask8) -1);
1696 }
1697 
1698 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1699 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1700 {
1701   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1702                 (__v8df) __W,
1703                 (__mmask8) __U);
1704 }
1705 
1706 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1707 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1708 {
1709   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1710                 (__v8df)
1711                 _mm512_setzero_pd (),
1712                 (__mmask8) __U);
1713 }
1714 
1715 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1716 _mm512_rcp14_ps(__m512 __A)
1717 {
1718   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719               (__v16sf)
1720               _mm512_setzero_ps (),
1721               (__mmask16) -1);
1722 }
1723 
1724 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1725 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1726 {
1727   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1728                    (__v16sf) __W,
1729                    (__mmask16) __U);
1730 }
1731 
1732 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1733 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1734 {
1735   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1736                    (__v16sf)
1737                    _mm512_setzero_ps (),
1738                    (__mmask16) __U);
1739 }
1740 
1741 static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1742 _mm_rcp14_ss(__m128 __A, __m128 __B)
1743 {
1744   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1745                  (__v4sf) __B,
1746                  (__v4sf)
1747                  _mm_setzero_ps (),
1748                  (__mmask8) -1);
1749 }
1750 
1751 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1752 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1753 {
1754  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1755           (__v4sf) __B,
1756           (__v4sf) __W,
1757           (__mmask8) __U);
1758 }
1759 
1760 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1761 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1762 {
1763  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1764           (__v4sf) __B,
1765           (__v4sf) _mm_setzero_ps (),
1766           (__mmask8) __U);
1767 }
1768 
1769 static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1770 _mm_rcp14_sd(__m128d __A, __m128d __B)
1771 {
1772   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1773             (__v2df) __B,
1774             (__v2df)
1775             _mm_setzero_pd (),
1776             (__mmask8) -1);
1777 }
1778 
1779 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1780 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1781 {
1782  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1783           (__v2df) __B,
1784           (__v2df) __W,
1785           (__mmask8) __U);
1786 }
1787 
1788 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1789 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1790 {
1791  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1792           (__v2df) __B,
1793           (__v2df) _mm_setzero_pd (),
1794           (__mmask8) __U);
1795 }
1796 
1797 static __inline __m512 __DEFAULT_FN_ATTRS512
1798 _mm512_floor_ps(__m512 __A)
1799 {
1800   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1801                                                   _MM_FROUND_FLOOR,
1802                                                   (__v16sf) __A, (unsigned short)-1,
1803                                                   _MM_FROUND_CUR_DIRECTION);
1804 }
1805 
1806 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1807 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1808 {
1809   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1810                    _MM_FROUND_FLOOR,
1811                    (__v16sf) __W, __U,
1812                    _MM_FROUND_CUR_DIRECTION);
1813 }
1814 
1815 static __inline __m512d __DEFAULT_FN_ATTRS512
1816 _mm512_floor_pd(__m512d __A)
1817 {
1818   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1819                                                    _MM_FROUND_FLOOR,
1820                                                    (__v8df) __A, (unsigned char)-1,
1821                                                    _MM_FROUND_CUR_DIRECTION);
1822 }
1823 
1824 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1825 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1826 {
1827   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1828                 _MM_FROUND_FLOOR,
1829                 (__v8df) __W, __U,
1830                 _MM_FROUND_CUR_DIRECTION);
1831 }
1832 
1833 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1834 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1835 {
1836   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1837                    _MM_FROUND_CEIL,
1838                    (__v16sf) __W, __U,
1839                    _MM_FROUND_CUR_DIRECTION);
1840 }
1841 
1842 static __inline __m512 __DEFAULT_FN_ATTRS512
1843 _mm512_ceil_ps(__m512 __A)
1844 {
1845   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1846                                                   _MM_FROUND_CEIL,
1847                                                   (__v16sf) __A, (unsigned short)-1,
1848                                                   _MM_FROUND_CUR_DIRECTION);
1849 }
1850 
1851 static __inline __m512d __DEFAULT_FN_ATTRS512
1852 _mm512_ceil_pd(__m512d __A)
1853 {
1854   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1855                                                    _MM_FROUND_CEIL,
1856                                                    (__v8df) __A, (unsigned char)-1,
1857                                                    _MM_FROUND_CUR_DIRECTION);
1858 }
1859 
1860 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1861 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1862 {
1863   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1864                 _MM_FROUND_CEIL,
1865                 (__v8df) __W, __U,
1866                 _MM_FROUND_CUR_DIRECTION);
1867 }
1868 
1869 static __inline __m512i __DEFAULT_FN_ATTRS512
1870 _mm512_abs_epi64(__m512i __A)
1871 {
1872   return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1873 }
1874 
1875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1876 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1877 {
1878   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1879                                              (__v8di)_mm512_abs_epi64(__A),
1880                                              (__v8di)__W);
1881 }
1882 
1883 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1884 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1885 {
1886   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1887                                              (__v8di)_mm512_abs_epi64(__A),
1888                                              (__v8di)_mm512_setzero_si512());
1889 }
1890 
1891 static __inline __m512i __DEFAULT_FN_ATTRS512
1892 _mm512_abs_epi32(__m512i __A)
1893 {
1894   return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1895 }
1896 
1897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1898 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1899 {
1900   return (__m512i)__builtin_ia32_selectd_512(__U,
1901                                              (__v16si)_mm512_abs_epi32(__A),
1902                                              (__v16si)__W);
1903 }
1904 
1905 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1906 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1907 {
1908   return (__m512i)__builtin_ia32_selectd_512(__U,
1909                                              (__v16si)_mm512_abs_epi32(__A),
1910                                              (__v16si)_mm512_setzero_si512());
1911 }
1912 
1913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1914 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1915   __A = _mm_add_ss(__A, __B);
1916   return __builtin_ia32_selectss_128(__U, __A, __W);
1917 }
1918 
1919 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1920 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1921   __A = _mm_add_ss(__A, __B);
1922   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1923 }
1924 
1925 #define _mm_add_round_ss(A, B, R) \
1926   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1927                                            (__v4sf)(__m128)(B), \
1928                                            (__v4sf)_mm_setzero_ps(), \
1929                                            (__mmask8)-1, (int)(R)))
1930 
1931 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1932   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1933                                            (__v4sf)(__m128)(B), \
1934                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1935                                            (int)(R)))
1936 
1937 #define _mm_maskz_add_round_ss(U, A, B, R) \
1938   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1939                                            (__v4sf)(__m128)(B), \
1940                                            (__v4sf)_mm_setzero_ps(), \
1941                                            (__mmask8)(U), (int)(R)))
1942 
1943 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1944 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1945   __A = _mm_add_sd(__A, __B);
1946   return __builtin_ia32_selectsd_128(__U, __A, __W);
1947 }
1948 
1949 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1950 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1951   __A = _mm_add_sd(__A, __B);
1952   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1953 }
1954 #define _mm_add_round_sd(A, B, R) \
1955   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1956                                             (__v2df)(__m128d)(B), \
1957                                             (__v2df)_mm_setzero_pd(), \
1958                                             (__mmask8)-1, (int)(R)))
1959 
1960 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1961   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1962                                             (__v2df)(__m128d)(B), \
1963                                             (__v2df)(__m128d)(W), \
1964                                             (__mmask8)(U), (int)(R)))
1965 
1966 #define _mm_maskz_add_round_sd(U, A, B, R) \
1967   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1968                                             (__v2df)(__m128d)(B), \
1969                                             (__v2df)_mm_setzero_pd(), \
1970                                             (__mmask8)(U), (int)(R)))
1971 
1972 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1973 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1974   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1975                                               (__v8df)_mm512_add_pd(__A, __B),
1976                                               (__v8df)__W);
1977 }
1978 
1979 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1980 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1981   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1982                                               (__v8df)_mm512_add_pd(__A, __B),
1983                                               (__v8df)_mm512_setzero_pd());
1984 }
1985 
1986 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1987 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1988   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1989                                              (__v16sf)_mm512_add_ps(__A, __B),
1990                                              (__v16sf)__W);
1991 }
1992 
1993 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1994 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1995   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1996                                              (__v16sf)_mm512_add_ps(__A, __B),
1997                                              (__v16sf)_mm512_setzero_ps());
1998 }
1999 
2000 #define _mm512_add_round_pd(A, B, R) \
2001   ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
2002                                     (__v8df)(__m512d)(B), (int)(R)))
2003 
2004 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
2005   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2006                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2007                                    (__v8df)(__m512d)(W)))
2008 
2009 #define _mm512_maskz_add_round_pd(U, A, B, R) \
2010   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2011                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2012                                    (__v8df)_mm512_setzero_pd()))
2013 
2014 #define _mm512_add_round_ps(A, B, R) \
2015   ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2016                                    (__v16sf)(__m512)(B), (int)(R)))
2017 
2018 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2019   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2020                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2021                                   (__v16sf)(__m512)(W)))
2022 
2023 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2024   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2025                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2026                                   (__v16sf)_mm512_setzero_ps()))
2027 
2028 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2029 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2030   __A = _mm_sub_ss(__A, __B);
2031   return __builtin_ia32_selectss_128(__U, __A, __W);
2032 }
2033 
2034 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2035 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2036   __A = _mm_sub_ss(__A, __B);
2037   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2038 }
2039 #define _mm_sub_round_ss(A, B, R) \
2040   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2041                                            (__v4sf)(__m128)(B), \
2042                                            (__v4sf)_mm_setzero_ps(), \
2043                                            (__mmask8)-1, (int)(R)))
2044 
2045 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2046   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2047                                            (__v4sf)(__m128)(B), \
2048                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2049                                            (int)(R)))
2050 
2051 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2052   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2053                                            (__v4sf)(__m128)(B), \
2054                                            (__v4sf)_mm_setzero_ps(), \
2055                                            (__mmask8)(U), (int)(R)))
2056 
2057 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2058 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2059   __A = _mm_sub_sd(__A, __B);
2060   return __builtin_ia32_selectsd_128(__U, __A, __W);
2061 }
2062 
2063 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2064 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2065   __A = _mm_sub_sd(__A, __B);
2066   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2067 }
2068 
2069 #define _mm_sub_round_sd(A, B, R) \
2070   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2071                                             (__v2df)(__m128d)(B), \
2072                                             (__v2df)_mm_setzero_pd(), \
2073                                             (__mmask8)-1, (int)(R)))
2074 
2075 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2076   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2077                                             (__v2df)(__m128d)(B), \
2078                                             (__v2df)(__m128d)(W), \
2079                                             (__mmask8)(U), (int)(R)))
2080 
2081 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2082   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2083                                             (__v2df)(__m128d)(B), \
2084                                             (__v2df)_mm_setzero_pd(), \
2085                                             (__mmask8)(U), (int)(R)))
2086 
2087 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2088 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2089   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090                                               (__v8df)_mm512_sub_pd(__A, __B),
2091                                               (__v8df)__W);
2092 }
2093 
2094 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2095 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2096   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2097                                               (__v8df)_mm512_sub_pd(__A, __B),
2098                                               (__v8df)_mm512_setzero_pd());
2099 }
2100 
2101 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2102 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2103   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2104                                              (__v16sf)_mm512_sub_ps(__A, __B),
2105                                              (__v16sf)__W);
2106 }
2107 
2108 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2109 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2110   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2111                                              (__v16sf)_mm512_sub_ps(__A, __B),
2112                                              (__v16sf)_mm512_setzero_ps());
2113 }
2114 
2115 #define _mm512_sub_round_pd(A, B, R) \
2116   ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2117                                     (__v8df)(__m512d)(B), (int)(R)))
2118 
2119 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2120   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2121                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2122                                    (__v8df)(__m512d)(W)))
2123 
2124 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2125   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2126                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2127                                    (__v8df)_mm512_setzero_pd()))
2128 
2129 #define _mm512_sub_round_ps(A, B, R) \
2130   ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2131                                    (__v16sf)(__m512)(B), (int)(R)))
2132 
2133 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2134   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2135                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2136                                   (__v16sf)(__m512)(W)))
2137 
2138 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2139   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2140                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2141                                   (__v16sf)_mm512_setzero_ps()))
2142 
2143 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2144 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2145   __A = _mm_mul_ss(__A, __B);
2146   return __builtin_ia32_selectss_128(__U, __A, __W);
2147 }
2148 
2149 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2150 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2151   __A = _mm_mul_ss(__A, __B);
2152   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2153 }
2154 #define _mm_mul_round_ss(A, B, R) \
2155   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2156                                            (__v4sf)(__m128)(B), \
2157                                            (__v4sf)_mm_setzero_ps(), \
2158                                            (__mmask8)-1, (int)(R)))
2159 
2160 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2161   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2162                                            (__v4sf)(__m128)(B), \
2163                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2164                                            (int)(R)))
2165 
2166 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2167   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2168                                            (__v4sf)(__m128)(B), \
2169                                            (__v4sf)_mm_setzero_ps(), \
2170                                            (__mmask8)(U), (int)(R)))
2171 
2172 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2173 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2174   __A = _mm_mul_sd(__A, __B);
2175   return __builtin_ia32_selectsd_128(__U, __A, __W);
2176 }
2177 
2178 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2179 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2180   __A = _mm_mul_sd(__A, __B);
2181   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2182 }
2183 
2184 #define _mm_mul_round_sd(A, B, R) \
2185   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2186                                             (__v2df)(__m128d)(B), \
2187                                             (__v2df)_mm_setzero_pd(), \
2188                                             (__mmask8)-1, (int)(R)))
2189 
2190 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2191   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2192                                             (__v2df)(__m128d)(B), \
2193                                             (__v2df)(__m128d)(W), \
2194                                             (__mmask8)(U), (int)(R)))
2195 
2196 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2197   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2198                                             (__v2df)(__m128d)(B), \
2199                                             (__v2df)_mm_setzero_pd(), \
2200                                             (__mmask8)(U), (int)(R)))
2201 
2202 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2203 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2204   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2205                                               (__v8df)_mm512_mul_pd(__A, __B),
2206                                               (__v8df)__W);
2207 }
2208 
2209 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2210 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2211   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2212                                               (__v8df)_mm512_mul_pd(__A, __B),
2213                                               (__v8df)_mm512_setzero_pd());
2214 }
2215 
2216 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2217 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2218   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2219                                              (__v16sf)_mm512_mul_ps(__A, __B),
2220                                              (__v16sf)__W);
2221 }
2222 
2223 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2224 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2225   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2226                                              (__v16sf)_mm512_mul_ps(__A, __B),
2227                                              (__v16sf)_mm512_setzero_ps());
2228 }
2229 
2230 #define _mm512_mul_round_pd(A, B, R) \
2231   ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2232                                     (__v8df)(__m512d)(B), (int)(R)))
2233 
2234 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2235   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2236                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2237                                    (__v8df)(__m512d)(W)))
2238 
2239 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2240   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2241                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2242                                    (__v8df)_mm512_setzero_pd()))
2243 
2244 #define _mm512_mul_round_ps(A, B, R) \
2245   ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2246                                   (__v16sf)(__m512)(B), (int)(R)))
2247 
2248 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2249   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2250                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2251                                   (__v16sf)(__m512)(W)))
2252 
2253 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2254   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2255                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2256                                   (__v16sf)_mm512_setzero_ps()))
2257 
2258 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2259 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2260   __A = _mm_div_ss(__A, __B);
2261   return __builtin_ia32_selectss_128(__U, __A, __W);
2262 }
2263 
2264 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2265 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2266   __A = _mm_div_ss(__A, __B);
2267   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2268 }
2269 
2270 #define _mm_div_round_ss(A, B, R) \
2271   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2272                                            (__v4sf)(__m128)(B), \
2273                                            (__v4sf)_mm_setzero_ps(), \
2274                                            (__mmask8)-1, (int)(R)))
2275 
2276 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2277   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2278                                            (__v4sf)(__m128)(B), \
2279                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2280                                            (int)(R)))
2281 
2282 #define _mm_maskz_div_round_ss(U, A, B, R) \
2283   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2284                                            (__v4sf)(__m128)(B), \
2285                                            (__v4sf)_mm_setzero_ps(), \
2286                                            (__mmask8)(U), (int)(R)))
2287 
2288 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2289 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2290   __A = _mm_div_sd(__A, __B);
2291   return __builtin_ia32_selectsd_128(__U, __A, __W);
2292 }
2293 
2294 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2295 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2296   __A = _mm_div_sd(__A, __B);
2297   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2298 }
2299 
2300 #define _mm_div_round_sd(A, B, R) \
2301   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2302                                             (__v2df)(__m128d)(B), \
2303                                             (__v2df)_mm_setzero_pd(), \
2304                                             (__mmask8)-1, (int)(R)))
2305 
2306 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2307   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2308                                             (__v2df)(__m128d)(B), \
2309                                             (__v2df)(__m128d)(W), \
2310                                             (__mmask8)(U), (int)(R)))
2311 
2312 #define _mm_maskz_div_round_sd(U, A, B, R) \
2313   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2314                                             (__v2df)(__m128d)(B), \
2315                                             (__v2df)_mm_setzero_pd(), \
2316                                             (__mmask8)(U), (int)(R)))
2317 
2318 static __inline __m512d __DEFAULT_FN_ATTRS512
2319 _mm512_div_pd(__m512d __a, __m512d __b)
2320 {
2321   return (__m512d)((__v8df)__a/(__v8df)__b);
2322 }
2323 
2324 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2325 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2326   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2327                                               (__v8df)_mm512_div_pd(__A, __B),
2328                                               (__v8df)__W);
2329 }
2330 
2331 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2332 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2333   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2334                                               (__v8df)_mm512_div_pd(__A, __B),
2335                                               (__v8df)_mm512_setzero_pd());
2336 }
2337 
2338 static __inline __m512 __DEFAULT_FN_ATTRS512
2339 _mm512_div_ps(__m512 __a, __m512 __b)
2340 {
2341   return (__m512)((__v16sf)__a/(__v16sf)__b);
2342 }
2343 
2344 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2345 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2346   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2347                                              (__v16sf)_mm512_div_ps(__A, __B),
2348                                              (__v16sf)__W);
2349 }
2350 
2351 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2352 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2353   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2354                                              (__v16sf)_mm512_div_ps(__A, __B),
2355                                              (__v16sf)_mm512_setzero_ps());
2356 }
2357 
2358 #define _mm512_div_round_pd(A, B, R) \
2359   ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2360                                     (__v8df)(__m512d)(B), (int)(R)))
2361 
2362 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2363   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2364                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2365                                    (__v8df)(__m512d)(W)))
2366 
2367 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2368   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2369                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2370                                    (__v8df)_mm512_setzero_pd()))
2371 
2372 #define _mm512_div_round_ps(A, B, R) \
2373   ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2374                                    (__v16sf)(__m512)(B), (int)(R)))
2375 
2376 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2377   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2378                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2379                                   (__v16sf)(__m512)(W)))
2380 
2381 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2382   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2383                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2384                                   (__v16sf)_mm512_setzero_ps()))
2385 
2386 #define _mm512_roundscale_ps(A, B) \
2387   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2388                                           (__v16sf)_mm512_undefined_ps(), \
2389                                           (__mmask16)-1, \
2390                                           _MM_FROUND_CUR_DIRECTION))
2391 
2392 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2393   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2394                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2395                                          _MM_FROUND_CUR_DIRECTION))
2396 
2397 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2398   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2399                                           (__v16sf)_mm512_setzero_ps(), \
2400                                           (__mmask16)(A), \
2401                                           _MM_FROUND_CUR_DIRECTION))
2402 
2403 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2404   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2405                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2406                                          (int)(R)))
2407 
2408 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2409   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2410                                           (__v16sf)_mm512_setzero_ps(), \
2411                                           (__mmask16)(A), (int)(R)))
2412 
2413 #define _mm512_roundscale_round_ps(A, imm, R) \
2414   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2415                                           (__v16sf)_mm512_undefined_ps(), \
2416                                           (__mmask16)-1, (int)(R)))
2417 
2418 #define _mm512_roundscale_pd(A, B) \
2419   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2420                                            (__v8df)_mm512_undefined_pd(), \
2421                                            (__mmask8)-1, \
2422                                            _MM_FROUND_CUR_DIRECTION))
2423 
2424 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2425   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2426                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2427                                           _MM_FROUND_CUR_DIRECTION))
2428 
2429 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2430   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2431                                            (__v8df)_mm512_setzero_pd(), \
2432                                            (__mmask8)(A), \
2433                                            _MM_FROUND_CUR_DIRECTION))
2434 
2435 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2436   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2437                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2438                                           (int)(R)))
2439 
2440 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2441   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2442                                            (__v8df)_mm512_setzero_pd(), \
2443                                            (__mmask8)(A), (int)(R)))
2444 
2445 #define _mm512_roundscale_round_pd(A, imm, R) \
2446   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2447                                            (__v8df)_mm512_undefined_pd(), \
2448                                            (__mmask8)-1, (int)(R)))
2449 
2450 #define _mm512_fmadd_round_pd(A, B, C, R) \
2451   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2452                                             (__v8df)(__m512d)(B), \
2453                                             (__v8df)(__m512d)(C), \
2454                                             (__mmask8)-1, (int)(R)))
2455 
2456 
2457 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2458   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2459                                             (__v8df)(__m512d)(B), \
2460                                             (__v8df)(__m512d)(C), \
2461                                             (__mmask8)(U), (int)(R)))
2462 
2463 
2464 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2465   ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2466                                              (__v8df)(__m512d)(B), \
2467                                              (__v8df)(__m512d)(C), \
2468                                              (__mmask8)(U), (int)(R)))
2469 
2470 
2471 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2472   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2473                                              (__v8df)(__m512d)(B), \
2474                                              (__v8df)(__m512d)(C), \
2475                                              (__mmask8)(U), (int)(R)))
2476 
2477 
2478 #define _mm512_fmsub_round_pd(A, B, C, R) \
2479   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2480                                             (__v8df)(__m512d)(B), \
2481                                             -(__v8df)(__m512d)(C), \
2482                                             (__mmask8)-1, (int)(R)))
2483 
2484 
2485 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2486   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2487                                             (__v8df)(__m512d)(B), \
2488                                             -(__v8df)(__m512d)(C), \
2489                                             (__mmask8)(U), (int)(R)))
2490 
2491 
2492 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2493   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2494                                              (__v8df)(__m512d)(B), \
2495                                              -(__v8df)(__m512d)(C), \
2496                                              (__mmask8)(U), (int)(R)))
2497 
2498 
2499 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2500   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2501                                             (__v8df)(__m512d)(B), \
2502                                             (__v8df)(__m512d)(C), \
2503                                             (__mmask8)-1, (int)(R)))
2504 
2505 
2506 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2507   ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2508                                              (__v8df)(__m512d)(B), \
2509                                              (__v8df)(__m512d)(C), \
2510                                              (__mmask8)(U), (int)(R)))
2511 
2512 
2513 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2514   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2515                                              (__v8df)(__m512d)(B), \
2516                                              (__v8df)(__m512d)(C), \
2517                                              (__mmask8)(U), (int)(R)))
2518 
2519 
2520 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2521   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2522                                             (__v8df)(__m512d)(B), \
2523                                             -(__v8df)(__m512d)(C), \
2524                                             (__mmask8)-1, (int)(R)))
2525 
2526 
2527 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2528   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2529                                              (__v8df)(__m512d)(B), \
2530                                              -(__v8df)(__m512d)(C), \
2531                                              (__mmask8)(U), (int)(R)))
2532 
2533 
2534 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2535 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2536 {
2537   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2538                                                     (__v8df) __B,
2539                                                     (__v8df) __C,
2540                                                     (__mmask8) -1,
2541                                                     _MM_FROUND_CUR_DIRECTION);
2542 }
2543 
2544 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2545 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2546 {
2547   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2548                                                     (__v8df) __B,
2549                                                     (__v8df) __C,
2550                                                     (__mmask8) __U,
2551                                                     _MM_FROUND_CUR_DIRECTION);
2552 }
2553 
2554 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2555 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2556 {
2557   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2558                                                      (__v8df) __B,
2559                                                      (__v8df) __C,
2560                                                      (__mmask8) __U,
2561                                                      _MM_FROUND_CUR_DIRECTION);
2562 }
2563 
2564 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2565 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2566 {
2567   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2568                                                      (__v8df) __B,
2569                                                      (__v8df) __C,
2570                                                      (__mmask8) __U,
2571                                                      _MM_FROUND_CUR_DIRECTION);
2572 }
2573 
2574 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2575 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2576 {
2577   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2578                                                     (__v8df) __B,
2579                                                     -(__v8df) __C,
2580                                                     (__mmask8) -1,
2581                                                     _MM_FROUND_CUR_DIRECTION);
2582 }
2583 
2584 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2585 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2586 {
2587   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2588                                                     (__v8df) __B,
2589                                                     -(__v8df) __C,
2590                                                     (__mmask8) __U,
2591                                                     _MM_FROUND_CUR_DIRECTION);
2592 }
2593 
2594 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2595 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2596 {
2597   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2598                                                      (__v8df) __B,
2599                                                      -(__v8df) __C,
2600                                                      (__mmask8) __U,
2601                                                      _MM_FROUND_CUR_DIRECTION);
2602 }
2603 
2604 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2605 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2606 {
2607   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2608                                                     -(__v8df) __B,
2609                                                     (__v8df) __C,
2610                                                     (__mmask8) -1,
2611                                                     _MM_FROUND_CUR_DIRECTION);
2612 }
2613 
2614 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2615 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2616 {
2617   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2618                                                      (__v8df) __B,
2619                                                      (__v8df) __C,
2620                                                      (__mmask8) __U,
2621                                                      _MM_FROUND_CUR_DIRECTION);
2622 }
2623 
2624 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2625 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2626 {
2627   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2628                                                      (__v8df) __B,
2629                                                      (__v8df) __C,
2630                                                      (__mmask8) __U,
2631                                                      _MM_FROUND_CUR_DIRECTION);
2632 }
2633 
2634 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2635 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2636 {
2637   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2638                                                     -(__v8df) __B,
2639                                                     -(__v8df) __C,
2640                                                     (__mmask8) -1,
2641                                                     _MM_FROUND_CUR_DIRECTION);
2642 }
2643 
2644 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2645 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2646 {
2647   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2648                                                      (__v8df) __B,
2649                                                      -(__v8df) __C,
2650                                                      (__mmask8) __U,
2651                                                      _MM_FROUND_CUR_DIRECTION);
2652 }
2653 
2654 #define _mm512_fmadd_round_ps(A, B, C, R) \
2655   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2656                                            (__v16sf)(__m512)(B), \
2657                                            (__v16sf)(__m512)(C), \
2658                                            (__mmask16)-1, (int)(R)))
2659 
2660 
2661 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2662   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2663                                            (__v16sf)(__m512)(B), \
2664                                            (__v16sf)(__m512)(C), \
2665                                            (__mmask16)(U), (int)(R)))
2666 
2667 
2668 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2669   ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2670                                             (__v16sf)(__m512)(B), \
2671                                             (__v16sf)(__m512)(C), \
2672                                             (__mmask16)(U), (int)(R)))
2673 
2674 
2675 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2676   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2677                                             (__v16sf)(__m512)(B), \
2678                                             (__v16sf)(__m512)(C), \
2679                                             (__mmask16)(U), (int)(R)))
2680 
2681 
2682 #define _mm512_fmsub_round_ps(A, B, C, R) \
2683   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2684                                            (__v16sf)(__m512)(B), \
2685                                            -(__v16sf)(__m512)(C), \
2686                                            (__mmask16)-1, (int)(R)))
2687 
2688 
2689 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2690   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2691                                            (__v16sf)(__m512)(B), \
2692                                            -(__v16sf)(__m512)(C), \
2693                                            (__mmask16)(U), (int)(R)))
2694 
2695 
2696 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2697   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2698                                             (__v16sf)(__m512)(B), \
2699                                             -(__v16sf)(__m512)(C), \
2700                                             (__mmask16)(U), (int)(R)))
2701 
2702 
2703 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2704   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2705                                            -(__v16sf)(__m512)(B), \
2706                                            (__v16sf)(__m512)(C), \
2707                                            (__mmask16)-1, (int)(R)))
2708 
2709 
2710 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2711   ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2712                                             (__v16sf)(__m512)(B), \
2713                                             (__v16sf)(__m512)(C), \
2714                                             (__mmask16)(U), (int)(R)))
2715 
2716 
2717 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2718   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2719                                             (__v16sf)(__m512)(B), \
2720                                             (__v16sf)(__m512)(C), \
2721                                             (__mmask16)(U), (int)(R)))
2722 
2723 
2724 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2725   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2726                                            -(__v16sf)(__m512)(B), \
2727                                            -(__v16sf)(__m512)(C), \
2728                                            (__mmask16)-1, (int)(R)))
2729 
2730 
2731 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2732   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2733                                             (__v16sf)(__m512)(B), \
2734                                             -(__v16sf)(__m512)(C), \
2735                                             (__mmask16)(U), (int)(R)))
2736 
2737 
2738 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2739 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2740 {
2741   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2742                                                    (__v16sf) __B,
2743                                                    (__v16sf) __C,
2744                                                    (__mmask16) -1,
2745                                                    _MM_FROUND_CUR_DIRECTION);
2746 }
2747 
2748 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2749 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2750 {
2751   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2752                                                    (__v16sf) __B,
2753                                                    (__v16sf) __C,
2754                                                    (__mmask16) __U,
2755                                                    _MM_FROUND_CUR_DIRECTION);
2756 }
2757 
2758 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2759 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2760 {
2761   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2762                                                     (__v16sf) __B,
2763                                                     (__v16sf) __C,
2764                                                     (__mmask16) __U,
2765                                                     _MM_FROUND_CUR_DIRECTION);
2766 }
2767 
2768 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2769 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2770 {
2771   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2772                                                     (__v16sf) __B,
2773                                                     (__v16sf) __C,
2774                                                     (__mmask16) __U,
2775                                                     _MM_FROUND_CUR_DIRECTION);
2776 }
2777 
2778 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2779 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2780 {
2781   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2782                                                    (__v16sf) __B,
2783                                                    -(__v16sf) __C,
2784                                                    (__mmask16) -1,
2785                                                    _MM_FROUND_CUR_DIRECTION);
2786 }
2787 
2788 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2789 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2790 {
2791   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2792                                                    (__v16sf) __B,
2793                                                    -(__v16sf) __C,
2794                                                    (__mmask16) __U,
2795                                                    _MM_FROUND_CUR_DIRECTION);
2796 }
2797 
2798 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2799 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2800 {
2801   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2802                                                     (__v16sf) __B,
2803                                                     -(__v16sf) __C,
2804                                                     (__mmask16) __U,
2805                                                     _MM_FROUND_CUR_DIRECTION);
2806 }
2807 
2808 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2809 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2810 {
2811   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2812                                                    -(__v16sf) __B,
2813                                                    (__v16sf) __C,
2814                                                    (__mmask16) -1,
2815                                                    _MM_FROUND_CUR_DIRECTION);
2816 }
2817 
2818 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2819 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2820 {
2821   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2822                                                     (__v16sf) __B,
2823                                                     (__v16sf) __C,
2824                                                     (__mmask16) __U,
2825                                                     _MM_FROUND_CUR_DIRECTION);
2826 }
2827 
2828 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2829 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2830 {
2831   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2832                                                     (__v16sf) __B,
2833                                                     (__v16sf) __C,
2834                                                     (__mmask16) __U,
2835                                                     _MM_FROUND_CUR_DIRECTION);
2836 }
2837 
2838 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2839 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2840 {
2841   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2842                                                    -(__v16sf) __B,
2843                                                    -(__v16sf) __C,
2844                                                    (__mmask16) -1,
2845                                                    _MM_FROUND_CUR_DIRECTION);
2846 }
2847 
2848 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2849 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2850 {
2851   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2852                                                     (__v16sf) __B,
2853                                                     -(__v16sf) __C,
2854                                                     (__mmask16) __U,
2855                                                     _MM_FROUND_CUR_DIRECTION);
2856 }
2857 
2858 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2859   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2860                                                (__v8df)(__m512d)(B), \
2861                                                (__v8df)(__m512d)(C), \
2862                                                (__mmask8)-1, (int)(R)))
2863 
2864 
2865 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2866   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2867                                                (__v8df)(__m512d)(B), \
2868                                                (__v8df)(__m512d)(C), \
2869                                                (__mmask8)(U), (int)(R)))
2870 
2871 
2872 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2873   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2874                                                 (__v8df)(__m512d)(B), \
2875                                                 (__v8df)(__m512d)(C), \
2876                                                 (__mmask8)(U), (int)(R)))
2877 
2878 
2879 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2880   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2881                                                 (__v8df)(__m512d)(B), \
2882                                                 (__v8df)(__m512d)(C), \
2883                                                 (__mmask8)(U), (int)(R)))
2884 
2885 
2886 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2887   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2888                                                (__v8df)(__m512d)(B), \
2889                                                -(__v8df)(__m512d)(C), \
2890                                                (__mmask8)-1, (int)(R)))
2891 
2892 
2893 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2894   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2895                                                (__v8df)(__m512d)(B), \
2896                                                -(__v8df)(__m512d)(C), \
2897                                                (__mmask8)(U), (int)(R)))
2898 
2899 
2900 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2901   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2902                                                 (__v8df)(__m512d)(B), \
2903                                                 -(__v8df)(__m512d)(C), \
2904                                                 (__mmask8)(U), (int)(R)))
2905 
2906 
2907 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2908 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2909 {
2910   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2911                                                       (__v8df) __B,
2912                                                       (__v8df) __C,
2913                                                       (__mmask8) -1,
2914                                                       _MM_FROUND_CUR_DIRECTION);
2915 }
2916 
2917 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2918 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2919 {
2920   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2921                                                       (__v8df) __B,
2922                                                       (__v8df) __C,
2923                                                       (__mmask8) __U,
2924                                                       _MM_FROUND_CUR_DIRECTION);
2925 }
2926 
2927 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2928 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2929 {
2930   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2931                                                        (__v8df) __B,
2932                                                        (__v8df) __C,
2933                                                        (__mmask8) __U,
2934                                                        _MM_FROUND_CUR_DIRECTION);
2935 }
2936 
2937 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2938 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2939 {
2940   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2941                                                        (__v8df) __B,
2942                                                        (__v8df) __C,
2943                                                        (__mmask8) __U,
2944                                                        _MM_FROUND_CUR_DIRECTION);
2945 }
2946 
2947 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2948 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2949 {
2950   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2951                                                        (__v8df) __B,
2952                                                        -(__v8df) __C,
2953                                                        (__mmask8) -1,
2954                                                        _MM_FROUND_CUR_DIRECTION);
2955 }
2956 
2957 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2958 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2959 {
2960   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2961                                                        (__v8df) __B,
2962                                                        -(__v8df) __C,
2963                                                        (__mmask8) __U,
2964                                                        _MM_FROUND_CUR_DIRECTION);
2965 }
2966 
2967 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2968 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2969 {
2970   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2971                                                         (__v8df) __B,
2972                                                         -(__v8df) __C,
2973                                                         (__mmask8) __U,
2974                                                         _MM_FROUND_CUR_DIRECTION);
2975 }
2976 
2977 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2978   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2979                                               (__v16sf)(__m512)(B), \
2980                                               (__v16sf)(__m512)(C), \
2981                                               (__mmask16)-1, (int)(R)))
2982 
2983 
2984 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2985   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2986                                               (__v16sf)(__m512)(B), \
2987                                               (__v16sf)(__m512)(C), \
2988                                               (__mmask16)(U), (int)(R)))
2989 
2990 
2991 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2992   ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2993                                                (__v16sf)(__m512)(B), \
2994                                                (__v16sf)(__m512)(C), \
2995                                                (__mmask16)(U), (int)(R)))
2996 
2997 
2998 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2999   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3000                                                (__v16sf)(__m512)(B), \
3001                                                (__v16sf)(__m512)(C), \
3002                                                (__mmask16)(U), (int)(R)))
3003 
3004 
3005 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3006   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3007                                               (__v16sf)(__m512)(B), \
3008                                               -(__v16sf)(__m512)(C), \
3009                                               (__mmask16)-1, (int)(R)))
3010 
3011 
3012 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3013   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3014                                               (__v16sf)(__m512)(B), \
3015                                               -(__v16sf)(__m512)(C), \
3016                                               (__mmask16)(U), (int)(R)))
3017 
3018 
3019 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3020   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3021                                                (__v16sf)(__m512)(B), \
3022                                                -(__v16sf)(__m512)(C), \
3023                                                (__mmask16)(U), (int)(R)))
3024 
3025 
3026 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3027 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3028 {
3029   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3030                                                       (__v16sf) __B,
3031                                                       (__v16sf) __C,
3032                                                       (__mmask16) -1,
3033                                                       _MM_FROUND_CUR_DIRECTION);
3034 }
3035 
3036 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3037 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3038 {
3039   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3040                                                       (__v16sf) __B,
3041                                                       (__v16sf) __C,
3042                                                       (__mmask16) __U,
3043                                                       _MM_FROUND_CUR_DIRECTION);
3044 }
3045 
3046 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3047 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3048 {
3049   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3050                                                        (__v16sf) __B,
3051                                                        (__v16sf) __C,
3052                                                        (__mmask16) __U,
3053                                                        _MM_FROUND_CUR_DIRECTION);
3054 }
3055 
3056 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3057 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3058 {
3059   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3060                                                        (__v16sf) __B,
3061                                                        (__v16sf) __C,
3062                                                        (__mmask16) __U,
3063                                                        _MM_FROUND_CUR_DIRECTION);
3064 }
3065 
3066 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3067 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3068 {
3069   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3070                                                       (__v16sf) __B,
3071                                                       -(__v16sf) __C,
3072                                                       (__mmask16) -1,
3073                                                       _MM_FROUND_CUR_DIRECTION);
3074 }
3075 
3076 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3077 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3078 {
3079   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3080                                                       (__v16sf) __B,
3081                                                       -(__v16sf) __C,
3082                                                       (__mmask16) __U,
3083                                                       _MM_FROUND_CUR_DIRECTION);
3084 }
3085 
3086 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3087 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3088 {
3089   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3090                                                        (__v16sf) __B,
3091                                                        -(__v16sf) __C,
3092                                                        (__mmask16) __U,
3093                                                        _MM_FROUND_CUR_DIRECTION);
3094 }
3095 
3096 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3097   ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3098                                              (__v8df)(__m512d)(B), \
3099                                              (__v8df)(__m512d)(C), \
3100                                              (__mmask8)(U), (int)(R)))
3101 
3102 
3103 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3104 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3105 {
3106   return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3107                                                     (__v8df) __B,
3108                                                     (__v8df) __C,
3109                                                     (__mmask8) __U,
3110                                                     _MM_FROUND_CUR_DIRECTION);
3111 }
3112 
3113 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3114   ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3115                                             (__v16sf)(__m512)(B), \
3116                                             (__v16sf)(__m512)(C), \
3117                                             (__mmask16)(U), (int)(R)))
3118 
3119 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3120 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3121 {
3122   return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3123                                                    (__v16sf) __B,
3124                                                    (__v16sf) __C,
3125                                                    (__mmask16) __U,
3126                                                    _MM_FROUND_CUR_DIRECTION);
3127 }
3128 
3129 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3130   ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3131                                                 (__v8df)(__m512d)(B), \
3132                                                 (__v8df)(__m512d)(C), \
3133                                                 (__mmask8)(U), (int)(R)))
3134 
3135 
3136 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3137 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3138 {
3139   return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3140                                                        (__v8df) __B,
3141                                                        (__v8df) __C,
3142                                                        (__mmask8) __U,
3143                                                        _MM_FROUND_CUR_DIRECTION);
3144 }
3145 
3146 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3147   ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3148                                                (__v16sf)(__m512)(B), \
3149                                                (__v16sf)(__m512)(C), \
3150                                                (__mmask16)(U), (int)(R)))
3151 
3152 
3153 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3154 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3155 {
3156   return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3157                                                       (__v16sf) __B,
3158                                                       (__v16sf) __C,
3159                                                       (__mmask16) __U,
3160                                                       _MM_FROUND_CUR_DIRECTION);
3161 }
3162 
3163 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3164   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3165                                             -(__v8df)(__m512d)(B), \
3166                                             (__v8df)(__m512d)(C), \
3167                                             (__mmask8)(U), (int)(R)))
3168 
3169 
3170 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3171 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3172 {
3173   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3174                                                     -(__v8df) __B,
3175                                                     (__v8df) __C,
3176                                                     (__mmask8) __U,
3177                                                     _MM_FROUND_CUR_DIRECTION);
3178 }
3179 
3180 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3181   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3182                                            -(__v16sf)(__m512)(B), \
3183                                            (__v16sf)(__m512)(C), \
3184                                            (__mmask16)(U), (int)(R)))
3185 
3186 
3187 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3188 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3189 {
3190   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3191                                                    -(__v16sf) __B,
3192                                                    (__v16sf) __C,
3193                                                    (__mmask16) __U,
3194                                                    _MM_FROUND_CUR_DIRECTION);
3195 }
3196 
3197 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3198   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3199                                             -(__v8df)(__m512d)(B), \
3200                                             -(__v8df)(__m512d)(C), \
3201                                             (__mmask8)(U), (int)(R)))
3202 
3203 
3204 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3205   ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3206                                              (__v8df)(__m512d)(B), \
3207                                              (__v8df)(__m512d)(C), \
3208                                              (__mmask8)(U), (int)(R)))
3209 
3210 
3211 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3212 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3213 {
3214   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3215                                                     -(__v8df) __B,
3216                                                     -(__v8df) __C,
3217                                                     (__mmask8) __U,
3218                                                     _MM_FROUND_CUR_DIRECTION);
3219 }
3220 
3221 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3222 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3223 {
3224   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3225                                                      (__v8df) __B,
3226                                                      (__v8df) __C,
3227                                                      (__mmask8) __U,
3228                                                      _MM_FROUND_CUR_DIRECTION);
3229 }
3230 
3231 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3232   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3233                                            -(__v16sf)(__m512)(B), \
3234                                            -(__v16sf)(__m512)(C), \
3235                                            (__mmask16)(U), (int)(R)))
3236 
3237 
3238 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3239   ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3240                                             (__v16sf)(__m512)(B), \
3241                                             (__v16sf)(__m512)(C), \
3242                                             (__mmask16)(U), (int)(R)))
3243 
3244 
3245 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3246 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3247 {
3248   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3249                                                    -(__v16sf) __B,
3250                                                    -(__v16sf) __C,
3251                                                    (__mmask16) __U,
3252                                                    _MM_FROUND_CUR_DIRECTION);
3253 }
3254 
3255 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3256 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3257 {
3258   return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3259                                                     (__v16sf) __B,
3260                                                     (__v16sf) __C,
3261                                                     (__mmask16) __U,
3262                                                     _MM_FROUND_CUR_DIRECTION);
3263 }
3264 
3265 
3266 
3267 /* Vector permutations */
3268 
3269 static __inline __m512i __DEFAULT_FN_ATTRS512
3270 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3271 {
3272   return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3273                                                 (__v16si) __B);
3274 }
3275 
3276 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3277 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3278                                __m512i __B)
3279 {
3280   return (__m512i)__builtin_ia32_selectd_512(__U,
3281                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3282                               (__v16si)__A);
3283 }
3284 
3285 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3286 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3287                                 __m512i __B)
3288 {
3289   return (__m512i)__builtin_ia32_selectd_512(__U,
3290                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3291                               (__v16si)__I);
3292 }
3293 
3294 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3295 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3296                                 __m512i __B)
3297 {
3298   return (__m512i)__builtin_ia32_selectd_512(__U,
3299                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3300                               (__v16si)_mm512_setzero_si512());
3301 }
3302 
3303 static __inline __m512i __DEFAULT_FN_ATTRS512
3304 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3305 {
3306   return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3307                                                 (__v8di) __B);
3308 }
3309 
3310 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3311 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3312                                __m512i __B)
3313 {
3314   return (__m512i)__builtin_ia32_selectq_512(__U,
3315                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3316                                (__v8di)__A);
3317 }
3318 
3319 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3320 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3321                                 __m512i __B)
3322 {
3323   return (__m512i)__builtin_ia32_selectq_512(__U,
3324                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3325                                (__v8di)__I);
3326 }
3327 
3328 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3329 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3330                                 __m512i __B)
3331 {
3332   return (__m512i)__builtin_ia32_selectq_512(__U,
3333                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3334                                (__v8di)_mm512_setzero_si512());
3335 }
3336 
3337 #define _mm512_alignr_epi64(A, B, I) \
3338   ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3339                                      (__v8di)(__m512i)(B), (int)(I)))
3340 
3341 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3342   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3343                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3344                                   (__v8di)(__m512i)(W)))
3345 
3346 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3347   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3348                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3349                                   (__v8di)_mm512_setzero_si512()))
3350 
3351 #define _mm512_alignr_epi32(A, B, I) \
3352   ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3353                                      (__v16si)(__m512i)(B), (int)(I)))
3354 
3355 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3356   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3357                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3358                                  (__v16si)(__m512i)(W)))
3359 
3360 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3361   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3362                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3363                                  (__v16si)_mm512_setzero_si512()))
3364 /* Vector Extract */
3365 
3366 #define _mm512_extractf64x4_pd(A, I) \
3367   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3368                                              (__v4df)_mm256_undefined_pd(), \
3369                                              (__mmask8)-1))
3370 
3371 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3372   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3373                                              (__v4df)(__m256d)(W), \
3374                                              (__mmask8)(U)))
3375 
3376 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3377   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3378                                              (__v4df)_mm256_setzero_pd(), \
3379                                              (__mmask8)(U)))
3380 
3381 #define _mm512_extractf32x4_ps(A, I) \
3382   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3383                                             (__v4sf)_mm_undefined_ps(), \
3384                                             (__mmask8)-1))
3385 
3386 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3387   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3388                                             (__v4sf)(__m128)(W), \
3389                                             (__mmask8)(U)))
3390 
3391 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3392   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3393                                             (__v4sf)_mm_setzero_ps(), \
3394                                             (__mmask8)(U)))
3395 
3396 /* Vector Blend */
3397 
3398 static __inline __m512d __DEFAULT_FN_ATTRS512
3399 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3400 {
3401   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3402                  (__v8df) __W,
3403                  (__v8df) __A);
3404 }
3405 
3406 static __inline __m512 __DEFAULT_FN_ATTRS512
3407 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3408 {
3409   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3410                 (__v16sf) __W,
3411                 (__v16sf) __A);
3412 }
3413 
3414 static __inline __m512i __DEFAULT_FN_ATTRS512
3415 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3416 {
3417   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3418                 (__v8di) __W,
3419                 (__v8di) __A);
3420 }
3421 
3422 static __inline __m512i __DEFAULT_FN_ATTRS512
3423 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3424 {
3425   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3426                 (__v16si) __W,
3427                 (__v16si) __A);
3428 }
3429 
3430 /* Compare */
3431 
3432 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3433   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3434                                            (__v16sf)(__m512)(B), (int)(P), \
3435                                            (__mmask16)-1, (int)(R)))
3436 
3437 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3438   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3439                                            (__v16sf)(__m512)(B), (int)(P), \
3440                                            (__mmask16)(U), (int)(R)))
3441 
3442 #define _mm512_cmp_ps_mask(A, B, P) \
3443   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3444 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3445   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3446 
3447 #define _mm512_cmpeq_ps_mask(A, B) \
3448     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3449 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3450     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3451 
3452 #define _mm512_cmplt_ps_mask(A, B) \
3453     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3454 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3455     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3456 
3457 #define _mm512_cmple_ps_mask(A, B) \
3458     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3459 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3460     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3461 
3462 #define _mm512_cmpunord_ps_mask(A, B) \
3463     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3464 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3465     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3466 
3467 #define _mm512_cmpneq_ps_mask(A, B) \
3468     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3469 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3470     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3471 
3472 #define _mm512_cmpnlt_ps_mask(A, B) \
3473     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3474 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3475     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3476 
3477 #define _mm512_cmpnle_ps_mask(A, B) \
3478     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3479 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3480     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3481 
3482 #define _mm512_cmpord_ps_mask(A, B) \
3483     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3484 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3485     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3486 
3487 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3488   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3489                                           (__v8df)(__m512d)(B), (int)(P), \
3490                                           (__mmask8)-1, (int)(R)))
3491 
3492 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3493   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3494                                           (__v8df)(__m512d)(B), (int)(P), \
3495                                           (__mmask8)(U), (int)(R)))
3496 
3497 #define _mm512_cmp_pd_mask(A, B, P) \
3498   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3499 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3500   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3501 
3502 #define _mm512_cmpeq_pd_mask(A, B) \
3503     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3504 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3505     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3506 
3507 #define _mm512_cmplt_pd_mask(A, B) \
3508     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3509 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3510     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3511 
3512 #define _mm512_cmple_pd_mask(A, B) \
3513     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3514 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3515     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3516 
3517 #define _mm512_cmpunord_pd_mask(A, B) \
3518     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3519 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3520     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3521 
3522 #define _mm512_cmpneq_pd_mask(A, B) \
3523     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3524 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3525     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3526 
3527 #define _mm512_cmpnlt_pd_mask(A, B) \
3528     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3529 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3530     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3531 
3532 #define _mm512_cmpnle_pd_mask(A, B) \
3533     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3534 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3535     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3536 
3537 #define _mm512_cmpord_pd_mask(A, B) \
3538     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3539 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3540     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3541 
3542 /* Conversion */
3543 
3544 #define _mm512_cvtt_roundps_epu32(A, R) \
3545   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3546                                               (__v16si)_mm512_undefined_epi32(), \
3547                                               (__mmask16)-1, (int)(R)))
3548 
3549 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3550   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3551                                               (__v16si)(__m512i)(W), \
3552                                               (__mmask16)(U), (int)(R)))
3553 
3554 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3555   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3556                                               (__v16si)_mm512_setzero_si512(), \
3557                                               (__mmask16)(U), (int)(R)))
3558 
3559 
3560 static __inline __m512i __DEFAULT_FN_ATTRS512
3561 _mm512_cvttps_epu32(__m512 __A)
3562 {
3563   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3564                   (__v16si)
3565                   _mm512_setzero_si512 (),
3566                   (__mmask16) -1,
3567                   _MM_FROUND_CUR_DIRECTION);
3568 }
3569 
3570 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3571 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3572 {
3573   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3574                    (__v16si) __W,
3575                    (__mmask16) __U,
3576                    _MM_FROUND_CUR_DIRECTION);
3577 }
3578 
3579 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3580 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3581 {
3582   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3583                    (__v16si) _mm512_setzero_si512 (),
3584                    (__mmask16) __U,
3585                    _MM_FROUND_CUR_DIRECTION);
3586 }
3587 
3588 #define _mm512_cvt_roundepi32_ps(A, R) \
3589   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3590                                            (__v16sf)_mm512_setzero_ps(), \
3591                                            (__mmask16)-1, (int)(R)))
3592 
3593 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3594   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3595                                            (__v16sf)(__m512)(W), \
3596                                            (__mmask16)(U), (int)(R)))
3597 
3598 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3599   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3600                                            (__v16sf)_mm512_setzero_ps(), \
3601                                            (__mmask16)(U), (int)(R)))
3602 
3603 #define _mm512_cvt_roundepu32_ps(A, R) \
3604   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3605                                             (__v16sf)_mm512_setzero_ps(), \
3606                                             (__mmask16)-1, (int)(R)))
3607 
3608 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3609   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3610                                             (__v16sf)(__m512)(W), \
3611                                             (__mmask16)(U), (int)(R)))
3612 
3613 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3614   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3615                                             (__v16sf)_mm512_setzero_ps(), \
3616                                             (__mmask16)(U), (int)(R)))
3617 
3618 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3619 _mm512_cvtepu32_ps (__m512i __A)
3620 {
3621   return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3622 }
3623 
3624 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3625 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3626 {
3627   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3628                                              (__v16sf)_mm512_cvtepu32_ps(__A),
3629                                              (__v16sf)__W);
3630 }
3631 
3632 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3633 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3634 {
3635   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3636                                              (__v16sf)_mm512_cvtepu32_ps(__A),
3637                                              (__v16sf)_mm512_setzero_ps());
3638 }
3639 
3640 static __inline __m512d __DEFAULT_FN_ATTRS512
3641 _mm512_cvtepi32_pd(__m256i __A)
3642 {
3643   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3644 }
3645 
3646 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3647 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3648 {
3649   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3650                                               (__v8df)_mm512_cvtepi32_pd(__A),
3651                                               (__v8df)__W);
3652 }
3653 
3654 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3655 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3656 {
3657   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3658                                               (__v8df)_mm512_cvtepi32_pd(__A),
3659                                               (__v8df)_mm512_setzero_pd());
3660 }
3661 
3662 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3663 _mm512_cvtepi32lo_pd(__m512i __A)
3664 {
3665   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3666 }
3667 
3668 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3669 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3670 {
3671   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3672 }
3673 
3674 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3675 _mm512_cvtepi32_ps (__m512i __A)
3676 {
3677   return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3678 }
3679 
3680 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3681 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3682 {
3683   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3684                                              (__v16sf)_mm512_cvtepi32_ps(__A),
3685                                              (__v16sf)__W);
3686 }
3687 
3688 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3689 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3690 {
3691   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3692                                              (__v16sf)_mm512_cvtepi32_ps(__A),
3693                                              (__v16sf)_mm512_setzero_ps());
3694 }
3695 
3696 static __inline __m512d __DEFAULT_FN_ATTRS512
3697 _mm512_cvtepu32_pd(__m256i __A)
3698 {
3699   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3700 }
3701 
3702 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3703 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3704 {
3705   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3706                                               (__v8df)_mm512_cvtepu32_pd(__A),
3707                                               (__v8df)__W);
3708 }
3709 
3710 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3711 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3712 {
3713   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3714                                               (__v8df)_mm512_cvtepu32_pd(__A),
3715                                               (__v8df)_mm512_setzero_pd());
3716 }
3717 
3718 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3719 _mm512_cvtepu32lo_pd(__m512i __A)
3720 {
3721   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3722 }
3723 
3724 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3725 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3726 {
3727   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3728 }
3729 
3730 #define _mm512_cvt_roundpd_ps(A, R) \
3731   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3732                                            (__v8sf)_mm256_setzero_ps(), \
3733                                            (__mmask8)-1, (int)(R)))
3734 
3735 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3736   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3737                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
3738                                            (int)(R)))
3739 
3740 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3741   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3742                                            (__v8sf)_mm256_setzero_ps(), \
3743                                            (__mmask8)(U), (int)(R)))
3744 
3745 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3746 _mm512_cvtpd_ps (__m512d __A)
3747 {
3748   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3749                 (__v8sf) _mm256_undefined_ps (),
3750                 (__mmask8) -1,
3751                 _MM_FROUND_CUR_DIRECTION);
3752 }
3753 
3754 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3755 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3756 {
3757   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3758                 (__v8sf) __W,
3759                 (__mmask8) __U,
3760                 _MM_FROUND_CUR_DIRECTION);
3761 }
3762 
3763 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3764 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3765 {
3766   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3767                 (__v8sf) _mm256_setzero_ps (),
3768                 (__mmask8) __U,
3769                 _MM_FROUND_CUR_DIRECTION);
3770 }
3771 
3772 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3773 _mm512_cvtpd_pslo (__m512d __A)
3774 {
3775   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3776                 (__v8sf) _mm256_setzero_ps (),
3777                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3778 }
3779 
3780 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3781 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3782 {
3783   return (__m512) __builtin_shufflevector (
3784                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3785                                                __U, __A),
3786                 (__v8sf) _mm256_setzero_ps (),
3787                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3788 }
3789 
3790 #define _mm512_cvt_roundps_ph(A, I) \
3791   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3792                                              (__v16hi)_mm256_undefined_si256(), \
3793                                              (__mmask16)-1))
3794 
3795 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3796   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3797                                              (__v16hi)(__m256i)(U), \
3798                                              (__mmask16)(W)))
3799 
3800 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3801   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3802                                              (__v16hi)_mm256_setzero_si256(), \
3803                                              (__mmask16)(W)))
3804 
3805 #define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
3806 #define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
3807 #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3808 
3809 #define _mm512_cvt_roundph_ps(A, R) \
3810   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3811                                             (__v16sf)_mm512_undefined_ps(), \
3812                                             (__mmask16)-1, (int)(R)))
3813 
3814 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3815   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3816                                             (__v16sf)(__m512)(W), \
3817                                             (__mmask16)(U), (int)(R)))
3818 
3819 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3820   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3821                                             (__v16sf)_mm512_setzero_ps(), \
3822                                             (__mmask16)(U), (int)(R)))
3823 
3824 
3825 static  __inline __m512 __DEFAULT_FN_ATTRS512
3826 _mm512_cvtph_ps(__m256i __A)
3827 {
3828   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3829                 (__v16sf)
3830                 _mm512_setzero_ps (),
3831                 (__mmask16) -1,
3832                 _MM_FROUND_CUR_DIRECTION);
3833 }
3834 
3835 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3836 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3837 {
3838   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3839                  (__v16sf) __W,
3840                  (__mmask16) __U,
3841                  _MM_FROUND_CUR_DIRECTION);
3842 }
3843 
3844 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3845 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3846 {
3847   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3848                  (__v16sf) _mm512_setzero_ps (),
3849                  (__mmask16) __U,
3850                  _MM_FROUND_CUR_DIRECTION);
3851 }
3852 
3853 #define _mm512_cvtt_roundpd_epi32(A, R) \
3854   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3855                                              (__v8si)_mm256_setzero_si256(), \
3856                                              (__mmask8)-1, (int)(R)))
3857 
3858 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3859   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3860                                              (__v8si)(__m256i)(W), \
3861                                              (__mmask8)(U), (int)(R)))
3862 
3863 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3864   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3865                                              (__v8si)_mm256_setzero_si256(), \
3866                                              (__mmask8)(U), (int)(R)))
3867 
3868 static __inline __m256i __DEFAULT_FN_ATTRS512
3869 _mm512_cvttpd_epi32(__m512d __a)
3870 {
3871   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3872                                                    (__v8si)_mm256_setzero_si256(),
3873                                                    (__mmask8) -1,
3874                                                     _MM_FROUND_CUR_DIRECTION);
3875 }
3876 
3877 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3878 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3879 {
3880   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3881                   (__v8si) __W,
3882                   (__mmask8) __U,
3883                   _MM_FROUND_CUR_DIRECTION);
3884 }
3885 
3886 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3887 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3888 {
3889   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3890                   (__v8si) _mm256_setzero_si256 (),
3891                   (__mmask8) __U,
3892                   _MM_FROUND_CUR_DIRECTION);
3893 }
3894 
3895 #define _mm512_cvtt_roundps_epi32(A, R) \
3896   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3897                                              (__v16si)_mm512_setzero_si512(), \
3898                                              (__mmask16)-1, (int)(R)))
3899 
3900 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3901   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3902                                              (__v16si)(__m512i)(W), \
3903                                              (__mmask16)(U), (int)(R)))
3904 
3905 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3906   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3907                                              (__v16si)_mm512_setzero_si512(), \
3908                                              (__mmask16)(U), (int)(R)))
3909 
3910 static __inline __m512i __DEFAULT_FN_ATTRS512
3911 _mm512_cvttps_epi32(__m512 __a)
3912 {
3913   return (__m512i)
3914     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3915                                      (__v16si) _mm512_setzero_si512 (),
3916                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3917 }
3918 
3919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3920 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3921 {
3922   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3923                   (__v16si) __W,
3924                   (__mmask16) __U,
3925                   _MM_FROUND_CUR_DIRECTION);
3926 }
3927 
3928 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3929 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3930 {
3931   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3932                   (__v16si) _mm512_setzero_si512 (),
3933                   (__mmask16) __U,
3934                   _MM_FROUND_CUR_DIRECTION);
3935 }
3936 
3937 #define _mm512_cvt_roundps_epi32(A, R) \
3938   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3939                                             (__v16si)_mm512_setzero_si512(), \
3940                                             (__mmask16)-1, (int)(R)))
3941 
3942 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3943   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3944                                             (__v16si)(__m512i)(W), \
3945                                             (__mmask16)(U), (int)(R)))
3946 
3947 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3948   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3949                                             (__v16si)_mm512_setzero_si512(), \
3950                                             (__mmask16)(U), (int)(R)))
3951 
3952 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3953 _mm512_cvtps_epi32 (__m512 __A)
3954 {
3955   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3956                  (__v16si) _mm512_undefined_epi32 (),
3957                  (__mmask16) -1,
3958                  _MM_FROUND_CUR_DIRECTION);
3959 }
3960 
3961 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3962 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3963 {
3964   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3965                  (__v16si) __W,
3966                  (__mmask16) __U,
3967                  _MM_FROUND_CUR_DIRECTION);
3968 }
3969 
3970 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3971 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3972 {
3973   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3974                  (__v16si)
3975                  _mm512_setzero_si512 (),
3976                  (__mmask16) __U,
3977                  _MM_FROUND_CUR_DIRECTION);
3978 }
3979 
3980 #define _mm512_cvt_roundpd_epi32(A, R) \
3981   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3982                                             (__v8si)_mm256_setzero_si256(), \
3983                                             (__mmask8)-1, (int)(R)))
3984 
3985 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3986   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3987                                             (__v8si)(__m256i)(W), \
3988                                             (__mmask8)(U), (int)(R)))
3989 
3990 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3991   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3992                                             (__v8si)_mm256_setzero_si256(), \
3993                                             (__mmask8)(U), (int)(R)))
3994 
3995 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3996 _mm512_cvtpd_epi32 (__m512d __A)
3997 {
3998   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3999                  (__v8si)
4000                  _mm256_undefined_si256 (),
4001                  (__mmask8) -1,
4002                  _MM_FROUND_CUR_DIRECTION);
4003 }
4004 
4005 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4006 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4007 {
4008   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4009                  (__v8si) __W,
4010                  (__mmask8) __U,
4011                  _MM_FROUND_CUR_DIRECTION);
4012 }
4013 
4014 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4015 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4016 {
4017   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4018                  (__v8si)
4019                  _mm256_setzero_si256 (),
4020                  (__mmask8) __U,
4021                  _MM_FROUND_CUR_DIRECTION);
4022 }
4023 
4024 #define _mm512_cvt_roundps_epu32(A, R) \
4025   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4026                                              (__v16si)_mm512_setzero_si512(), \
4027                                              (__mmask16)-1, (int)(R)))
4028 
4029 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4030   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4031                                              (__v16si)(__m512i)(W), \
4032                                              (__mmask16)(U), (int)(R)))
4033 
4034 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4035   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4036                                              (__v16si)_mm512_setzero_si512(), \
4037                                              (__mmask16)(U), (int)(R)))
4038 
4039 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4040 _mm512_cvtps_epu32 ( __m512 __A)
4041 {
4042   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4043                   (__v16si)\
4044                   _mm512_undefined_epi32 (),
4045                   (__mmask16) -1,\
4046                   _MM_FROUND_CUR_DIRECTION);
4047 }
4048 
4049 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4050 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4051 {
4052   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4053                   (__v16si) __W,
4054                   (__mmask16) __U,
4055                   _MM_FROUND_CUR_DIRECTION);
4056 }
4057 
4058 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4059 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4060 {
4061   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4062                   (__v16si)
4063                   _mm512_setzero_si512 (),
4064                   (__mmask16) __U ,
4065                   _MM_FROUND_CUR_DIRECTION);
4066 }
4067 
4068 #define _mm512_cvt_roundpd_epu32(A, R) \
4069   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4070                                              (__v8si)_mm256_setzero_si256(), \
4071                                              (__mmask8)-1, (int)(R)))
4072 
4073 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4074   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4075                                              (__v8si)(__m256i)(W), \
4076                                              (__mmask8)(U), (int)(R)))
4077 
4078 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4079   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4080                                              (__v8si)_mm256_setzero_si256(), \
4081                                              (__mmask8)(U), (int)(R)))
4082 
4083 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4084 _mm512_cvtpd_epu32 (__m512d __A)
4085 {
4086   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4087                   (__v8si)
4088                   _mm256_undefined_si256 (),
4089                   (__mmask8) -1,
4090                   _MM_FROUND_CUR_DIRECTION);
4091 }
4092 
4093 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4094 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4095 {
4096   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4097                   (__v8si) __W,
4098                   (__mmask8) __U,
4099                   _MM_FROUND_CUR_DIRECTION);
4100 }
4101 
4102 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4103 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4104 {
4105   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4106                   (__v8si)
4107                   _mm256_setzero_si256 (),
4108                   (__mmask8) __U,
4109                   _MM_FROUND_CUR_DIRECTION);
4110 }
4111 
4112 static __inline__ double __DEFAULT_FN_ATTRS512
4113 _mm512_cvtsd_f64(__m512d __a)
4114 {
4115   return __a[0];
4116 }
4117 
4118 static __inline__ float __DEFAULT_FN_ATTRS512
4119 _mm512_cvtss_f32(__m512 __a)
4120 {
4121   return __a[0];
4122 }
4123 
4124 /* Unpack and Interleave */
4125 
4126 static __inline __m512d __DEFAULT_FN_ATTRS512
4127 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4128 {
4129   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4131 }
4132 
4133 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4134 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135 {
4136   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4138                                            (__v8df)__W);
4139 }
4140 
4141 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4142 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4143 {
4144   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4146                                            (__v8df)_mm512_setzero_pd());
4147 }
4148 
4149 static __inline __m512d __DEFAULT_FN_ATTRS512
4150 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4151 {
4152   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4153                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4154 }
4155 
4156 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4157 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4158 {
4159   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4160                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4161                                            (__v8df)__W);
4162 }
4163 
4164 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4165 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4166 {
4167   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4168                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4169                                            (__v8df)_mm512_setzero_pd());
4170 }
4171 
4172 static __inline __m512 __DEFAULT_FN_ATTRS512
4173 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4174 {
4175   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4176                                          2,    18,    3,    19,
4177                                          2+4,  18+4,  3+4,  19+4,
4178                                          2+8,  18+8,  3+8,  19+8,
4179                                          2+12, 18+12, 3+12, 19+12);
4180 }
4181 
4182 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4183 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4184 {
4185   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4186                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4187                                           (__v16sf)__W);
4188 }
4189 
4190 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4191 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4192 {
4193   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4194                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4195                                           (__v16sf)_mm512_setzero_ps());
4196 }
4197 
4198 static __inline __m512 __DEFAULT_FN_ATTRS512
4199 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4200 {
4201   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4202                                          0,    16,    1,    17,
4203                                          0+4,  16+4,  1+4,  17+4,
4204                                          0+8,  16+8,  1+8,  17+8,
4205                                          0+12, 16+12, 1+12, 17+12);
4206 }
4207 
4208 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4209 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4210 {
4211   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4212                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4213                                           (__v16sf)__W);
4214 }
4215 
4216 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4217 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4218 {
4219   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4220                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4221                                           (__v16sf)_mm512_setzero_ps());
4222 }
4223 
4224 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4225 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4226 {
4227   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4228                                           2,    18,    3,    19,
4229                                           2+4,  18+4,  3+4,  19+4,
4230                                           2+8,  18+8,  3+8,  19+8,
4231                                           2+12, 18+12, 3+12, 19+12);
4232 }
4233 
4234 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4235 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4236 {
4237   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4238                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4239                                        (__v16si)__W);
4240 }
4241 
4242 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4243 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4244 {
4245   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4246                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4247                                        (__v16si)_mm512_setzero_si512());
4248 }
4249 
4250 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4251 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4252 {
4253   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4254                                           0,    16,    1,    17,
4255                                           0+4,  16+4,  1+4,  17+4,
4256                                           0+8,  16+8,  1+8,  17+8,
4257                                           0+12, 16+12, 1+12, 17+12);
4258 }
4259 
4260 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4261 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4262 {
4263   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4264                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4265                                        (__v16si)__W);
4266 }
4267 
4268 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4269 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4270 {
4271   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4272                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4273                                        (__v16si)_mm512_setzero_si512());
4274 }
4275 
4276 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4277 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4278 {
4279   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4281 }
4282 
4283 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4284 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285 {
4286   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4288                                         (__v8di)__W);
4289 }
4290 
4291 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4292 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4293 {
4294   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4296                                         (__v8di)_mm512_setzero_si512());
4297 }
4298 
4299 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4300 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4301 {
4302   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4303                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4304 }
4305 
4306 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4307 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4308 {
4309   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4310                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4311                                         (__v8di)__W);
4312 }
4313 
4314 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4315 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4316 {
4317   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4318                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4319                                         (__v8di)_mm512_setzero_si512());
4320 }
4321 
4322 
4323 /* SIMD load ops */
4324 
4325 static __inline __m512i __DEFAULT_FN_ATTRS512
4326 _mm512_loadu_si512 (void const *__P)
4327 {
4328   struct __loadu_si512 {
4329     __m512i_u __v;
4330   } __attribute__((__packed__, __may_alias__));
4331   return ((const struct __loadu_si512*)__P)->__v;
4332 }
4333 
4334 static __inline __m512i __DEFAULT_FN_ATTRS512
4335 _mm512_loadu_epi32 (void const *__P)
4336 {
4337   struct __loadu_epi32 {
4338     __m512i_u __v;
4339   } __attribute__((__packed__, __may_alias__));
4340   return ((const struct __loadu_epi32*)__P)->__v;
4341 }
4342 
4343 static __inline __m512i __DEFAULT_FN_ATTRS512
4344 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4345 {
4346   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4347                   (__v16si) __W,
4348                   (__mmask16) __U);
4349 }
4350 
4351 
4352 static __inline __m512i __DEFAULT_FN_ATTRS512
4353 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4354 {
4355   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4356                                                      (__v16si)
4357                                                      _mm512_setzero_si512 (),
4358                                                      (__mmask16) __U);
4359 }
4360 
4361 static __inline __m512i __DEFAULT_FN_ATTRS512
4362 _mm512_loadu_epi64 (void const *__P)
4363 {
4364   struct __loadu_epi64 {
4365     __m512i_u __v;
4366   } __attribute__((__packed__, __may_alias__));
4367   return ((const struct __loadu_epi64*)__P)->__v;
4368 }
4369 
4370 static __inline __m512i __DEFAULT_FN_ATTRS512
4371 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4372 {
4373   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4374                   (__v8di) __W,
4375                   (__mmask8) __U);
4376 }
4377 
4378 static __inline __m512i __DEFAULT_FN_ATTRS512
4379 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4380 {
4381   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4382                                                      (__v8di)
4383                                                      _mm512_setzero_si512 (),
4384                                                      (__mmask8) __U);
4385 }
4386 
4387 static __inline __m512 __DEFAULT_FN_ATTRS512
4388 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4389 {
4390   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4391                    (__v16sf) __W,
4392                    (__mmask16) __U);
4393 }
4394 
4395 static __inline __m512 __DEFAULT_FN_ATTRS512
4396 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4397 {
4398   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4399                                                   (__v16sf)
4400                                                   _mm512_setzero_ps (),
4401                                                   (__mmask16) __U);
4402 }
4403 
4404 static __inline __m512d __DEFAULT_FN_ATTRS512
4405 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4406 {
4407   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4408                 (__v8df) __W,
4409                 (__mmask8) __U);
4410 }
4411 
4412 static __inline __m512d __DEFAULT_FN_ATTRS512
4413 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4414 {
4415   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4416                                                    (__v8df)
4417                                                    _mm512_setzero_pd (),
4418                                                    (__mmask8) __U);
4419 }
4420 
4421 static __inline __m512d __DEFAULT_FN_ATTRS512
4422 _mm512_loadu_pd(void const *__p)
4423 {
4424   struct __loadu_pd {
4425     __m512d_u __v;
4426   } __attribute__((__packed__, __may_alias__));
4427   return ((const struct __loadu_pd*)__p)->__v;
4428 }
4429 
4430 static __inline __m512 __DEFAULT_FN_ATTRS512
4431 _mm512_loadu_ps(void const *__p)
4432 {
4433   struct __loadu_ps {
4434     __m512_u __v;
4435   } __attribute__((__packed__, __may_alias__));
4436   return ((const struct __loadu_ps*)__p)->__v;
4437 }
4438 
4439 static __inline __m512 __DEFAULT_FN_ATTRS512
4440 _mm512_load_ps(void const *__p)
4441 {
4442   return *(const __m512*)__p;
4443 }
4444 
4445 static __inline __m512 __DEFAULT_FN_ATTRS512
4446 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4447 {
4448   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4449                    (__v16sf) __W,
4450                    (__mmask16) __U);
4451 }
4452 
4453 static __inline __m512 __DEFAULT_FN_ATTRS512
4454 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4455 {
4456   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4457                                                   (__v16sf)
4458                                                   _mm512_setzero_ps (),
4459                                                   (__mmask16) __U);
4460 }
4461 
4462 static __inline __m512d __DEFAULT_FN_ATTRS512
4463 _mm512_load_pd(void const *__p)
4464 {
4465   return *(const __m512d*)__p;
4466 }
4467 
4468 static __inline __m512d __DEFAULT_FN_ATTRS512
4469 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4470 {
4471   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4472                           (__v8df) __W,
4473                           (__mmask8) __U);
4474 }
4475 
4476 static __inline __m512d __DEFAULT_FN_ATTRS512
4477 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4478 {
4479   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4480                                                    (__v8df)
4481                                                    _mm512_setzero_pd (),
4482                                                    (__mmask8) __U);
4483 }
4484 
4485 static __inline __m512i __DEFAULT_FN_ATTRS512
4486 _mm512_load_si512 (void const *__P)
4487 {
4488   return *(const __m512i *) __P;
4489 }
4490 
4491 static __inline __m512i __DEFAULT_FN_ATTRS512
4492 _mm512_load_epi32 (void const *__P)
4493 {
4494   return *(const __m512i *) __P;
4495 }
4496 
4497 static __inline __m512i __DEFAULT_FN_ATTRS512
4498 _mm512_load_epi64 (void const *__P)
4499 {
4500   return *(const __m512i *) __P;
4501 }
4502 
4503 /* SIMD store ops */
4504 
4505 static __inline void __DEFAULT_FN_ATTRS512
4506 _mm512_storeu_epi64 (void *__P, __m512i __A)
4507 {
4508   struct __storeu_epi64 {
4509     __m512i_u __v;
4510   } __attribute__((__packed__, __may_alias__));
4511   ((struct __storeu_epi64*)__P)->__v = __A;
4512 }
4513 
4514 static __inline void __DEFAULT_FN_ATTRS512
4515 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4516 {
4517   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4518                                      (__mmask8) __U);
4519 }
4520 
4521 static __inline void __DEFAULT_FN_ATTRS512
4522 _mm512_storeu_si512 (void *__P, __m512i __A)
4523 {
4524   struct __storeu_si512 {
4525     __m512i_u __v;
4526   } __attribute__((__packed__, __may_alias__));
4527   ((struct __storeu_si512*)__P)->__v = __A;
4528 }
4529 
4530 static __inline void __DEFAULT_FN_ATTRS512
4531 _mm512_storeu_epi32 (void *__P, __m512i __A)
4532 {
4533   struct __storeu_epi32 {
4534     __m512i_u __v;
4535   } __attribute__((__packed__, __may_alias__));
4536   ((struct __storeu_epi32*)__P)->__v = __A;
4537 }
4538 
4539 static __inline void __DEFAULT_FN_ATTRS512
4540 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4541 {
4542   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4543                                      (__mmask16) __U);
4544 }
4545 
4546 static __inline void __DEFAULT_FN_ATTRS512
4547 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4548 {
4549   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4550 }
4551 
4552 static __inline void __DEFAULT_FN_ATTRS512
4553 _mm512_storeu_pd(void *__P, __m512d __A)
4554 {
4555   struct __storeu_pd {
4556     __m512d_u __v;
4557   } __attribute__((__packed__, __may_alias__));
4558   ((struct __storeu_pd*)__P)->__v = __A;
4559 }
4560 
4561 static __inline void __DEFAULT_FN_ATTRS512
4562 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4563 {
4564   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4565                                    (__mmask16) __U);
4566 }
4567 
4568 static __inline void __DEFAULT_FN_ATTRS512
4569 _mm512_storeu_ps(void *__P, __m512 __A)
4570 {
4571   struct __storeu_ps {
4572     __m512_u __v;
4573   } __attribute__((__packed__, __may_alias__));
4574   ((struct __storeu_ps*)__P)->__v = __A;
4575 }
4576 
4577 static __inline void __DEFAULT_FN_ATTRS512
4578 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4579 {
4580   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4581 }
4582 
4583 static __inline void __DEFAULT_FN_ATTRS512
4584 _mm512_store_pd(void *__P, __m512d __A)
4585 {
4586   *(__m512d*)__P = __A;
4587 }
4588 
4589 static __inline void __DEFAULT_FN_ATTRS512
4590 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4591 {
4592   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4593                                    (__mmask16) __U);
4594 }
4595 
4596 static __inline void __DEFAULT_FN_ATTRS512
4597 _mm512_store_ps(void *__P, __m512 __A)
4598 {
4599   *(__m512*)__P = __A;
4600 }
4601 
4602 static __inline void __DEFAULT_FN_ATTRS512
4603 _mm512_store_si512 (void *__P, __m512i __A)
4604 {
4605   *(__m512i *) __P = __A;
4606 }
4607 
4608 static __inline void __DEFAULT_FN_ATTRS512
4609 _mm512_store_epi32 (void *__P, __m512i __A)
4610 {
4611   *(__m512i *) __P = __A;
4612 }
4613 
4614 static __inline void __DEFAULT_FN_ATTRS512
4615 _mm512_store_epi64 (void *__P, __m512i __A)
4616 {
4617   *(__m512i *) __P = __A;
4618 }
4619 
4620 /* Mask ops */
4621 
4622 static __inline __mmask16 __DEFAULT_FN_ATTRS
4623 _mm512_knot(__mmask16 __M)
4624 {
4625   return __builtin_ia32_knothi(__M);
4626 }
4627 
4628 /* Integer compare */
4629 
4630 #define _mm512_cmpeq_epi32_mask(A, B) \
4631     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4632 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4633     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4634 #define _mm512_cmpge_epi32_mask(A, B) \
4635     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4636 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4637     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4638 #define _mm512_cmpgt_epi32_mask(A, B) \
4639     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4640 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4641     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4642 #define _mm512_cmple_epi32_mask(A, B) \
4643     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4644 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4645     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4646 #define _mm512_cmplt_epi32_mask(A, B) \
4647     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4648 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4649     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4650 #define _mm512_cmpneq_epi32_mask(A, B) \
4651     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4652 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4653     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4654 
4655 #define _mm512_cmpeq_epu32_mask(A, B) \
4656     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4657 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4658     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4659 #define _mm512_cmpge_epu32_mask(A, B) \
4660     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4661 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4662     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4663 #define _mm512_cmpgt_epu32_mask(A, B) \
4664     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4665 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4666     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4667 #define _mm512_cmple_epu32_mask(A, B) \
4668     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4669 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4670     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4671 #define _mm512_cmplt_epu32_mask(A, B) \
4672     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4673 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4674     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4675 #define _mm512_cmpneq_epu32_mask(A, B) \
4676     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4677 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4678     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4679 
4680 #define _mm512_cmpeq_epi64_mask(A, B) \
4681     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4682 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4683     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4684 #define _mm512_cmpge_epi64_mask(A, B) \
4685     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4686 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4687     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4688 #define _mm512_cmpgt_epi64_mask(A, B) \
4689     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4690 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4691     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4692 #define _mm512_cmple_epi64_mask(A, B) \
4693     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4694 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4695     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4696 #define _mm512_cmplt_epi64_mask(A, B) \
4697     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4698 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4699     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4700 #define _mm512_cmpneq_epi64_mask(A, B) \
4701     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4702 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4703     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4704 
4705 #define _mm512_cmpeq_epu64_mask(A, B) \
4706     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4707 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4708     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4709 #define _mm512_cmpge_epu64_mask(A, B) \
4710     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4711 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4712     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4713 #define _mm512_cmpgt_epu64_mask(A, B) \
4714     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4715 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4716     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4717 #define _mm512_cmple_epu64_mask(A, B) \
4718     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4719 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4720     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4721 #define _mm512_cmplt_epu64_mask(A, B) \
4722     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4723 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4724     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4725 #define _mm512_cmpneq_epu64_mask(A, B) \
4726     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4727 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4728     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4729 
4730 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4731 _mm512_cvtepi8_epi32(__m128i __A)
4732 {
4733   /* This function always performs a signed extension, but __v16qi is a char
4734      which may be signed or unsigned, so use __v16qs. */
4735   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4736 }
4737 
4738 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4739 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4740 {
4741   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4742                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4743                                              (__v16si)__W);
4744 }
4745 
4746 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4747 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4748 {
4749   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4750                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4751                                              (__v16si)_mm512_setzero_si512());
4752 }
4753 
4754 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4755 _mm512_cvtepi8_epi64(__m128i __A)
4756 {
4757   /* This function always performs a signed extension, but __v16qi is a char
4758      which may be signed or unsigned, so use __v16qs. */
4759   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4760 }
4761 
4762 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4763 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4764 {
4765   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4766                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4767                                              (__v8di)__W);
4768 }
4769 
4770 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4771 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4772 {
4773   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4774                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4775                                              (__v8di)_mm512_setzero_si512 ());
4776 }
4777 
4778 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4779 _mm512_cvtepi32_epi64(__m256i __X)
4780 {
4781   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4782 }
4783 
4784 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4785 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4786 {
4787   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4788                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4789                                              (__v8di)__W);
4790 }
4791 
4792 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4793 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4794 {
4795   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4796                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4797                                              (__v8di)_mm512_setzero_si512());
4798 }
4799 
4800 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4801 _mm512_cvtepi16_epi32(__m256i __A)
4802 {
4803   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4804 }
4805 
4806 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4807 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4808 {
4809   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4810                                             (__v16si)_mm512_cvtepi16_epi32(__A),
4811                                             (__v16si)__W);
4812 }
4813 
4814 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4815 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4816 {
4817   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4818                                             (__v16si)_mm512_cvtepi16_epi32(__A),
4819                                             (__v16si)_mm512_setzero_si512 ());
4820 }
4821 
4822 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4823 _mm512_cvtepi16_epi64(__m128i __A)
4824 {
4825   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4826 }
4827 
4828 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4829 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4830 {
4831   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4832                                              (__v8di)_mm512_cvtepi16_epi64(__A),
4833                                              (__v8di)__W);
4834 }
4835 
4836 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4837 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4838 {
4839   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4840                                              (__v8di)_mm512_cvtepi16_epi64(__A),
4841                                              (__v8di)_mm512_setzero_si512());
4842 }
4843 
4844 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4845 _mm512_cvtepu8_epi32(__m128i __A)
4846 {
4847   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4848 }
4849 
4850 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4851 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4852 {
4853   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4854                                              (__v16si)_mm512_cvtepu8_epi32(__A),
4855                                              (__v16si)__W);
4856 }
4857 
4858 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4859 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4860 {
4861   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4862                                              (__v16si)_mm512_cvtepu8_epi32(__A),
4863                                              (__v16si)_mm512_setzero_si512());
4864 }
4865 
4866 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4867 _mm512_cvtepu8_epi64(__m128i __A)
4868 {
4869   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4870 }
4871 
4872 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4873 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4874 {
4875   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4876                                              (__v8di)_mm512_cvtepu8_epi64(__A),
4877                                              (__v8di)__W);
4878 }
4879 
4880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4881 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4882 {
4883   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4884                                              (__v8di)_mm512_cvtepu8_epi64(__A),
4885                                              (__v8di)_mm512_setzero_si512());
4886 }
4887 
4888 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4889 _mm512_cvtepu32_epi64(__m256i __X)
4890 {
4891   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4892 }
4893 
4894 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4895 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4896 {
4897   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4898                                              (__v8di)_mm512_cvtepu32_epi64(__X),
4899                                              (__v8di)__W);
4900 }
4901 
4902 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4903 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4904 {
4905   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4906                                              (__v8di)_mm512_cvtepu32_epi64(__X),
4907                                              (__v8di)_mm512_setzero_si512());
4908 }
4909 
4910 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4911 _mm512_cvtepu16_epi32(__m256i __A)
4912 {
4913   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4914 }
4915 
4916 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4917 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4918 {
4919   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4920                                             (__v16si)_mm512_cvtepu16_epi32(__A),
4921                                             (__v16si)__W);
4922 }
4923 
4924 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4925 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4926 {
4927   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4928                                             (__v16si)_mm512_cvtepu16_epi32(__A),
4929                                             (__v16si)_mm512_setzero_si512());
4930 }
4931 
4932 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4933 _mm512_cvtepu16_epi64(__m128i __A)
4934 {
4935   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4936 }
4937 
4938 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4939 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4940 {
4941   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4942                                              (__v8di)_mm512_cvtepu16_epi64(__A),
4943                                              (__v8di)__W);
4944 }
4945 
4946 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4947 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4948 {
4949   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4950                                              (__v8di)_mm512_cvtepu16_epi64(__A),
4951                                              (__v8di)_mm512_setzero_si512());
4952 }
4953 
4954 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4955 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4956 {
4957   return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4958 }
4959 
4960 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4961 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4962 {
4963   return (__m512i)__builtin_ia32_selectd_512(__U,
4964                                            (__v16si)_mm512_rorv_epi32(__A, __B),
4965                                            (__v16si)__W);
4966 }
4967 
4968 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4969 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4970 {
4971   return (__m512i)__builtin_ia32_selectd_512(__U,
4972                                            (__v16si)_mm512_rorv_epi32(__A, __B),
4973                                            (__v16si)_mm512_setzero_si512());
4974 }
4975 
4976 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4977 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4978 {
4979   return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4980 }
4981 
4982 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4983 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4984 {
4985   return (__m512i)__builtin_ia32_selectq_512(__U,
4986                                             (__v8di)_mm512_rorv_epi64(__A, __B),
4987                                             (__v8di)__W);
4988 }
4989 
4990 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4991 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4992 {
4993   return (__m512i)__builtin_ia32_selectq_512(__U,
4994                                             (__v8di)_mm512_rorv_epi64(__A, __B),
4995                                             (__v8di)_mm512_setzero_si512());
4996 }
4997 
4998 
4999 
5000 #define _mm512_cmp_epi32_mask(a, b, p) \
5001   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5002                                           (__v16si)(__m512i)(b), (int)(p), \
5003                                           (__mmask16)-1))
5004 
5005 #define _mm512_cmp_epu32_mask(a, b, p) \
5006   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5007                                            (__v16si)(__m512i)(b), (int)(p), \
5008                                            (__mmask16)-1))
5009 
5010 #define _mm512_cmp_epi64_mask(a, b, p) \
5011   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5012                                          (__v8di)(__m512i)(b), (int)(p), \
5013                                          (__mmask8)-1))
5014 
5015 #define _mm512_cmp_epu64_mask(a, b, p) \
5016   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5017                                           (__v8di)(__m512i)(b), (int)(p), \
5018                                           (__mmask8)-1))
5019 
5020 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5021   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5022                                           (__v16si)(__m512i)(b), (int)(p), \
5023                                           (__mmask16)(m)))
5024 
5025 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5026   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5027                                            (__v16si)(__m512i)(b), (int)(p), \
5028                                            (__mmask16)(m)))
5029 
5030 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5031   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5032                                          (__v8di)(__m512i)(b), (int)(p), \
5033                                          (__mmask8)(m)))
5034 
5035 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5036   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5037                                           (__v8di)(__m512i)(b), (int)(p), \
5038                                           (__mmask8)(m)))
5039 
5040 #define _mm512_rol_epi32(a, b) \
5041   ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
5042 
5043 #define _mm512_mask_rol_epi32(W, U, a, b) \
5044   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5045                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5046                                        (__v16si)(__m512i)(W)))
5047 
5048 #define _mm512_maskz_rol_epi32(U, a, b) \
5049   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5050                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5051                                        (__v16si)_mm512_setzero_si512()))
5052 
5053 #define _mm512_rol_epi64(a, b) \
5054   ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5055 
5056 #define _mm512_mask_rol_epi64(W, U, a, b) \
5057   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5058                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5059                                        (__v8di)(__m512i)(W)))
5060 
5061 #define _mm512_maskz_rol_epi64(U, a, b) \
5062   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5063                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5064                                        (__v8di)_mm512_setzero_si512()))
5065 
5066 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5067 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5068 {
5069   return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5070 }
5071 
5072 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5073 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5074 {
5075   return (__m512i)__builtin_ia32_selectd_512(__U,
5076                                            (__v16si)_mm512_rolv_epi32(__A, __B),
5077                                            (__v16si)__W);
5078 }
5079 
5080 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5081 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5082 {
5083   return (__m512i)__builtin_ia32_selectd_512(__U,
5084                                            (__v16si)_mm512_rolv_epi32(__A, __B),
5085                                            (__v16si)_mm512_setzero_si512());
5086 }
5087 
5088 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5089 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5090 {
5091   return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5092 }
5093 
5094 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5095 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5096 {
5097   return (__m512i)__builtin_ia32_selectq_512(__U,
5098                                             (__v8di)_mm512_rolv_epi64(__A, __B),
5099                                             (__v8di)__W);
5100 }
5101 
5102 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5103 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5104 {
5105   return (__m512i)__builtin_ia32_selectq_512(__U,
5106                                             (__v8di)_mm512_rolv_epi64(__A, __B),
5107                                             (__v8di)_mm512_setzero_si512());
5108 }
5109 
5110 #define _mm512_ror_epi32(A, B) \
5111   ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5112 
5113 #define _mm512_mask_ror_epi32(W, U, A, B) \
5114   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5115                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5116                                        (__v16si)(__m512i)(W)))
5117 
5118 #define _mm512_maskz_ror_epi32(U, A, B) \
5119   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5120                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5121                                        (__v16si)_mm512_setzero_si512()))
5122 
5123 #define _mm512_ror_epi64(A, B) \
5124   ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5125 
5126 #define _mm512_mask_ror_epi64(W, U, A, B) \
5127   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5128                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5129                                        (__v8di)(__m512i)(W)))
5130 
5131 #define _mm512_maskz_ror_epi64(U, A, B) \
5132   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5133                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5134                                        (__v8di)_mm512_setzero_si512()))
5135 
5136 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5137 _mm512_slli_epi32(__m512i __A, unsigned int __B)
5138 {
5139   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
5140 }
5141 
5142 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5143 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5144                        unsigned int __B)
5145 {
5146   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5147                                          (__v16si)_mm512_slli_epi32(__A, __B),
5148                                          (__v16si)__W);
5149 }
5150 
5151 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5152 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5153   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5154                                          (__v16si)_mm512_slli_epi32(__A, __B),
5155                                          (__v16si)_mm512_setzero_si512());
5156 }
5157 
5158 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5159 _mm512_slli_epi64(__m512i __A, unsigned int __B)
5160 {
5161   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
5162 }
5163 
5164 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5165 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
5166 {
5167   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5168                                           (__v8di)_mm512_slli_epi64(__A, __B),
5169                                           (__v8di)__W);
5170 }
5171 
5172 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5173 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
5174 {
5175   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5176                                           (__v8di)_mm512_slli_epi64(__A, __B),
5177                                           (__v8di)_mm512_setzero_si512());
5178 }
5179 
5180 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5181 _mm512_srli_epi32(__m512i __A, unsigned int __B)
5182 {
5183   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
5184 }
5185 
5186 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5187 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5188                        unsigned int __B)
5189 {
5190   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5191                                          (__v16si)_mm512_srli_epi32(__A, __B),
5192                                          (__v16si)__W);
5193 }
5194 
5195 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5196 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5197   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5198                                          (__v16si)_mm512_srli_epi32(__A, __B),
5199                                          (__v16si)_mm512_setzero_si512());
5200 }
5201 
5202 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5203 _mm512_srli_epi64(__m512i __A, unsigned int __B)
5204 {
5205   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
5206 }
5207 
5208 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5209 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5210                        unsigned int __B)
5211 {
5212   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5213                                           (__v8di)_mm512_srli_epi64(__A, __B),
5214                                           (__v8di)__W);
5215 }
5216 
5217 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5218 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
5219                         unsigned int __B)
5220 {
5221   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5222                                           (__v8di)_mm512_srli_epi64(__A, __B),
5223                                           (__v8di)_mm512_setzero_si512());
5224 }
5225 
5226 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5227 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5228 {
5229   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5230               (__v16si) __W,
5231               (__mmask16) __U);
5232 }
5233 
5234 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5235 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5236 {
5237   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5238               (__v16si)
5239               _mm512_setzero_si512 (),
5240               (__mmask16) __U);
5241 }
5242 
5243 static __inline__ void __DEFAULT_FN_ATTRS512
5244 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5245 {
5246   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5247           (__mmask16) __U);
5248 }
5249 
5250 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5251 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5252 {
5253   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5254                  (__v16si) __A,
5255                  (__v16si) __W);
5256 }
5257 
5258 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5259 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5260 {
5261   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5262                  (__v16si) __A,
5263                  (__v16si) _mm512_setzero_si512 ());
5264 }
5265 
5266 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5267 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5268 {
5269   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5270                  (__v8di) __A,
5271                  (__v8di) __W);
5272 }
5273 
5274 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5275 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5276 {
5277   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5278                  (__v8di) __A,
5279                  (__v8di) _mm512_setzero_si512 ());
5280 }
5281 
5282 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5283 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5284 {
5285   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5286               (__v8di) __W,
5287               (__mmask8) __U);
5288 }
5289 
5290 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5291 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5292 {
5293   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5294               (__v8di)
5295               _mm512_setzero_si512 (),
5296               (__mmask8) __U);
5297 }
5298 
5299 static __inline__ void __DEFAULT_FN_ATTRS512
5300 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5301 {
5302   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5303           (__mmask8) __U);
5304 }
5305 
5306 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5307 _mm512_movedup_pd (__m512d __A)
5308 {
5309   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5310                                           0, 0, 2, 2, 4, 4, 6, 6);
5311 }
5312 
5313 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5314 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5315 {
5316   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5317                                               (__v8df)_mm512_movedup_pd(__A),
5318                                               (__v8df)__W);
5319 }
5320 
5321 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5322 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5323 {
5324   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5325                                               (__v8df)_mm512_movedup_pd(__A),
5326                                               (__v8df)_mm512_setzero_pd());
5327 }
5328 
5329 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5330   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5331                                               (__v8df)(__m512d)(B), \
5332                                               (__v8di)(__m512i)(C), (int)(imm), \
5333                                               (__mmask8)-1, (int)(R)))
5334 
5335 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5336   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5337                                               (__v8df)(__m512d)(B), \
5338                                               (__v8di)(__m512i)(C), (int)(imm), \
5339                                               (__mmask8)(U), (int)(R)))
5340 
5341 #define _mm512_fixupimm_pd(A, B, C, imm) \
5342   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5343                                               (__v8df)(__m512d)(B), \
5344                                               (__v8di)(__m512i)(C), (int)(imm), \
5345                                               (__mmask8)-1, \
5346                                               _MM_FROUND_CUR_DIRECTION))
5347 
5348 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5349   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5350                                               (__v8df)(__m512d)(B), \
5351                                               (__v8di)(__m512i)(C), (int)(imm), \
5352                                               (__mmask8)(U), \
5353                                               _MM_FROUND_CUR_DIRECTION))
5354 
5355 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5356   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5357                                                (__v8df)(__m512d)(B), \
5358                                                (__v8di)(__m512i)(C), \
5359                                                (int)(imm), (__mmask8)(U), \
5360                                                (int)(R)))
5361 
5362 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5363   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5364                                                (__v8df)(__m512d)(B), \
5365                                                (__v8di)(__m512i)(C), \
5366                                                (int)(imm), (__mmask8)(U), \
5367                                                _MM_FROUND_CUR_DIRECTION))
5368 
5369 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5370   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5371                                              (__v16sf)(__m512)(B), \
5372                                              (__v16si)(__m512i)(C), (int)(imm), \
5373                                              (__mmask16)-1, (int)(R)))
5374 
5375 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5376   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5377                                              (__v16sf)(__m512)(B), \
5378                                              (__v16si)(__m512i)(C), (int)(imm), \
5379                                              (__mmask16)(U), (int)(R)))
5380 
5381 #define _mm512_fixupimm_ps(A, B, C, imm) \
5382   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5383                                              (__v16sf)(__m512)(B), \
5384                                              (__v16si)(__m512i)(C), (int)(imm), \
5385                                              (__mmask16)-1, \
5386                                              _MM_FROUND_CUR_DIRECTION))
5387 
5388 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5389   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5390                                              (__v16sf)(__m512)(B), \
5391                                              (__v16si)(__m512i)(C), (int)(imm), \
5392                                              (__mmask16)(U), \
5393                                              _MM_FROUND_CUR_DIRECTION))
5394 
5395 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5396   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5397                                               (__v16sf)(__m512)(B), \
5398                                               (__v16si)(__m512i)(C), \
5399                                               (int)(imm), (__mmask16)(U), \
5400                                               (int)(R)))
5401 
5402 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5403   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5404                                               (__v16sf)(__m512)(B), \
5405                                               (__v16si)(__m512i)(C), \
5406                                               (int)(imm), (__mmask16)(U), \
5407                                               _MM_FROUND_CUR_DIRECTION))
5408 
5409 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5410   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5411                                            (__v2df)(__m128d)(B), \
5412                                            (__v2di)(__m128i)(C), (int)(imm), \
5413                                            (__mmask8)-1, (int)(R)))
5414 
5415 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5416   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5417                                            (__v2df)(__m128d)(B), \
5418                                            (__v2di)(__m128i)(C), (int)(imm), \
5419                                            (__mmask8)(U), (int)(R)))
5420 
5421 #define _mm_fixupimm_sd(A, B, C, imm) \
5422   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5423                                            (__v2df)(__m128d)(B), \
5424                                            (__v2di)(__m128i)(C), (int)(imm), \
5425                                            (__mmask8)-1, \
5426                                            _MM_FROUND_CUR_DIRECTION))
5427 
5428 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5429   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5430                                            (__v2df)(__m128d)(B), \
5431                                            (__v2di)(__m128i)(C), (int)(imm), \
5432                                            (__mmask8)(U), \
5433                                            _MM_FROUND_CUR_DIRECTION))
5434 
5435 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5436   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5437                                             (__v2df)(__m128d)(B), \
5438                                             (__v2di)(__m128i)(C), (int)(imm), \
5439                                             (__mmask8)(U), (int)(R)))
5440 
5441 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5442   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5443                                             (__v2df)(__m128d)(B), \
5444                                             (__v2di)(__m128i)(C), (int)(imm), \
5445                                             (__mmask8)(U), \
5446                                             _MM_FROUND_CUR_DIRECTION))
5447 
5448 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5449   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5450                                           (__v4sf)(__m128)(B), \
5451                                           (__v4si)(__m128i)(C), (int)(imm), \
5452                                           (__mmask8)-1, (int)(R)))
5453 
5454 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5455   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5456                                           (__v4sf)(__m128)(B), \
5457                                           (__v4si)(__m128i)(C), (int)(imm), \
5458                                           (__mmask8)(U), (int)(R)))
5459 
5460 #define _mm_fixupimm_ss(A, B, C, imm) \
5461   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5462                                           (__v4sf)(__m128)(B), \
5463                                           (__v4si)(__m128i)(C), (int)(imm), \
5464                                           (__mmask8)-1, \
5465                                           _MM_FROUND_CUR_DIRECTION))
5466 
5467 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5468   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5469                                           (__v4sf)(__m128)(B), \
5470                                           (__v4si)(__m128i)(C), (int)(imm), \
5471                                           (__mmask8)(U), \
5472                                           _MM_FROUND_CUR_DIRECTION))
5473 
5474 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5475   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5476                                            (__v4sf)(__m128)(B), \
5477                                            (__v4si)(__m128i)(C), (int)(imm), \
5478                                            (__mmask8)(U), (int)(R)))
5479 
5480 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5481   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5482                                            (__v4sf)(__m128)(B), \
5483                                            (__v4si)(__m128i)(C), (int)(imm), \
5484                                            (__mmask8)(U), \
5485                                            _MM_FROUND_CUR_DIRECTION))
5486 
5487 #define _mm_getexp_round_sd(A, B, R) \
5488   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5489                                                   (__v2df)(__m128d)(B), \
5490                                                   (__v2df)_mm_setzero_pd(), \
5491                                                   (__mmask8)-1, (int)(R)))
5492 
5493 
5494 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5495 _mm_getexp_sd (__m128d __A, __m128d __B)
5496 {
5497   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5498                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5499 }
5500 
5501 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5502 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5503 {
5504  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5505           (__v2df) __B,
5506           (__v2df) __W,
5507           (__mmask8) __U,
5508           _MM_FROUND_CUR_DIRECTION);
5509 }
5510 
5511 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5512   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5513                                                   (__v2df)(__m128d)(B), \
5514                                                   (__v2df)(__m128d)(W), \
5515                                                   (__mmask8)(U), (int)(R)))
5516 
5517 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5518 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5519 {
5520  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5521           (__v2df) __B,
5522           (__v2df) _mm_setzero_pd (),
5523           (__mmask8) __U,
5524           _MM_FROUND_CUR_DIRECTION);
5525 }
5526 
5527 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5528   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5529                                                   (__v2df)(__m128d)(B), \
5530                                                   (__v2df)_mm_setzero_pd(), \
5531                                                   (__mmask8)(U), (int)(R)))
5532 
5533 #define _mm_getexp_round_ss(A, B, R) \
5534   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5535                                                  (__v4sf)(__m128)(B), \
5536                                                  (__v4sf)_mm_setzero_ps(), \
5537                                                  (__mmask8)-1, (int)(R)))
5538 
5539 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5540 _mm_getexp_ss (__m128 __A, __m128 __B)
5541 {
5542   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5543                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5544 }
5545 
5546 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5547 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5548 {
5549  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5550           (__v4sf) __B,
5551           (__v4sf) __W,
5552           (__mmask8) __U,
5553           _MM_FROUND_CUR_DIRECTION);
5554 }
5555 
5556 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5557   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5558                                                  (__v4sf)(__m128)(B), \
5559                                                  (__v4sf)(__m128)(W), \
5560                                                  (__mmask8)(U), (int)(R)))
5561 
5562 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5563 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5564 {
5565  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5566           (__v4sf) __B,
5567           (__v4sf) _mm_setzero_ps (),
5568           (__mmask8) __U,
5569           _MM_FROUND_CUR_DIRECTION);
5570 }
5571 
5572 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5573   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5574                                                  (__v4sf)(__m128)(B), \
5575                                                  (__v4sf)_mm_setzero_ps(), \
5576                                                  (__mmask8)(U), (int)(R)))
5577 
5578 #define _mm_getmant_round_sd(A, B, C, D, R) \
5579   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5580                                                 (__v2df)(__m128d)(B), \
5581                                                 (int)(((D)<<2) | (C)), \
5582                                                 (__v2df)_mm_setzero_pd(), \
5583                                                 (__mmask8)-1, (int)(R)))
5584 
5585 #define _mm_getmant_sd(A, B, C, D)  \
5586   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5587                                                 (__v2df)(__m128d)(B), \
5588                                                 (int)(((D)<<2) | (C)), \
5589                                                 (__v2df)_mm_setzero_pd(), \
5590                                                 (__mmask8)-1, \
5591                                                 _MM_FROUND_CUR_DIRECTION))
5592 
5593 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5594   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5595                                                 (__v2df)(__m128d)(B), \
5596                                                 (int)(((D)<<2) | (C)), \
5597                                                 (__v2df)(__m128d)(W), \
5598                                                 (__mmask8)(U), \
5599                                                 _MM_FROUND_CUR_DIRECTION))
5600 
5601 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5602   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5603                                                 (__v2df)(__m128d)(B), \
5604                                                 (int)(((D)<<2) | (C)), \
5605                                                 (__v2df)(__m128d)(W), \
5606                                                 (__mmask8)(U), (int)(R)))
5607 
5608 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5609   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5610                                                 (__v2df)(__m128d)(B), \
5611                                                 (int)(((D)<<2) | (C)), \
5612                                                 (__v2df)_mm_setzero_pd(), \
5613                                                 (__mmask8)(U), \
5614                                                 _MM_FROUND_CUR_DIRECTION))
5615 
5616 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5617   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5618                                                 (__v2df)(__m128d)(B), \
5619                                                 (int)(((D)<<2) | (C)), \
5620                                                 (__v2df)_mm_setzero_pd(), \
5621                                                 (__mmask8)(U), (int)(R)))
5622 
5623 #define _mm_getmant_round_ss(A, B, C, D, R) \
5624   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5625                                                (__v4sf)(__m128)(B), \
5626                                                (int)(((D)<<2) | (C)), \
5627                                                (__v4sf)_mm_setzero_ps(), \
5628                                                (__mmask8)-1, (int)(R)))
5629 
5630 #define _mm_getmant_ss(A, B, C, D) \
5631   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5632                                                (__v4sf)(__m128)(B), \
5633                                                (int)(((D)<<2) | (C)), \
5634                                                (__v4sf)_mm_setzero_ps(), \
5635                                                (__mmask8)-1, \
5636                                                _MM_FROUND_CUR_DIRECTION))
5637 
5638 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5639   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5640                                                (__v4sf)(__m128)(B), \
5641                                                (int)(((D)<<2) | (C)), \
5642                                                (__v4sf)(__m128)(W), \
5643                                                (__mmask8)(U), \
5644                                                _MM_FROUND_CUR_DIRECTION))
5645 
5646 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5647   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5648                                                (__v4sf)(__m128)(B), \
5649                                                (int)(((D)<<2) | (C)), \
5650                                                (__v4sf)(__m128)(W), \
5651                                                (__mmask8)(U), (int)(R)))
5652 
5653 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5654   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5655                                                (__v4sf)(__m128)(B), \
5656                                                (int)(((D)<<2) | (C)), \
5657                                                (__v4sf)_mm_setzero_ps(), \
5658                                                (__mmask8)(U), \
5659                                                _MM_FROUND_CUR_DIRECTION))
5660 
5661 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5662   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5663                                                (__v4sf)(__m128)(B), \
5664                                                (int)(((D)<<2) | (C)), \
5665                                                (__v4sf)_mm_setzero_ps(), \
5666                                                (__mmask8)(U), (int)(R)))
5667 
5668 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5669 _mm512_kmov (__mmask16 __A)
5670 {
5671   return  __A;
5672 }
5673 
5674 #define _mm_comi_round_sd(A, B, P, R) \
5675   ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5676                                (int)(P), (int)(R)))
5677 
5678 #define _mm_comi_round_ss(A, B, P, R) \
5679   ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5680                                (int)(P), (int)(R)))
5681 
5682 #ifdef __x86_64__
5683 #define _mm_cvt_roundsd_si64(A, R) \
5684   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5685 #endif
5686 
5687 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5688 _mm512_sll_epi32(__m512i __A, __m128i __B)
5689 {
5690   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5691 }
5692 
5693 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5694 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5695 {
5696   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5697                                           (__v16si)_mm512_sll_epi32(__A, __B),
5698                                           (__v16si)__W);
5699 }
5700 
5701 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5702 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5703 {
5704   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5705                                           (__v16si)_mm512_sll_epi32(__A, __B),
5706                                           (__v16si)_mm512_setzero_si512());
5707 }
5708 
5709 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5710 _mm512_sll_epi64(__m512i __A, __m128i __B)
5711 {
5712   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5713 }
5714 
5715 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5716 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5717 {
5718   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5719                                              (__v8di)_mm512_sll_epi64(__A, __B),
5720                                              (__v8di)__W);
5721 }
5722 
5723 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5724 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5725 {
5726   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5727                                            (__v8di)_mm512_sll_epi64(__A, __B),
5728                                            (__v8di)_mm512_setzero_si512());
5729 }
5730 
5731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5732 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5733 {
5734   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5735 }
5736 
5737 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5738 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5739 {
5740   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5741                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5742                                            (__v16si)__W);
5743 }
5744 
5745 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5746 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5747 {
5748   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5749                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5750                                            (__v16si)_mm512_setzero_si512());
5751 }
5752 
5753 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5754 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5755 {
5756   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5757 }
5758 
5759 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5760 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5761 {
5762   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5763                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5764                                             (__v8di)__W);
5765 }
5766 
5767 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5768 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5769 {
5770   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5771                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5772                                             (__v8di)_mm512_setzero_si512());
5773 }
5774 
5775 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5776 _mm512_sra_epi32(__m512i __A, __m128i __B)
5777 {
5778   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5779 }
5780 
5781 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5782 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5783 {
5784   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5785                                           (__v16si)_mm512_sra_epi32(__A, __B),
5786                                           (__v16si)__W);
5787 }
5788 
5789 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5790 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5791 {
5792   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5793                                           (__v16si)_mm512_sra_epi32(__A, __B),
5794                                           (__v16si)_mm512_setzero_si512());
5795 }
5796 
5797 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5798 _mm512_sra_epi64(__m512i __A, __m128i __B)
5799 {
5800   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5801 }
5802 
5803 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5804 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5805 {
5806   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5807                                            (__v8di)_mm512_sra_epi64(__A, __B),
5808                                            (__v8di)__W);
5809 }
5810 
5811 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5812 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5813 {
5814   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5815                                            (__v8di)_mm512_sra_epi64(__A, __B),
5816                                            (__v8di)_mm512_setzero_si512());
5817 }
5818 
5819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5820 _mm512_srav_epi32(__m512i __X, __m512i __Y)
5821 {
5822   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5823 }
5824 
5825 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5826 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5827 {
5828   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5829                                            (__v16si)_mm512_srav_epi32(__X, __Y),
5830                                            (__v16si)__W);
5831 }
5832 
5833 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5834 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5835 {
5836   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5837                                            (__v16si)_mm512_srav_epi32(__X, __Y),
5838                                            (__v16si)_mm512_setzero_si512());
5839 }
5840 
5841 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5842 _mm512_srav_epi64(__m512i __X, __m512i __Y)
5843 {
5844   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5845 }
5846 
5847 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5848 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5849 {
5850   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5851                                             (__v8di)_mm512_srav_epi64(__X, __Y),
5852                                             (__v8di)__W);
5853 }
5854 
5855 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5856 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5857 {
5858   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5859                                             (__v8di)_mm512_srav_epi64(__X, __Y),
5860                                             (__v8di)_mm512_setzero_si512());
5861 }
5862 
5863 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5864 _mm512_srl_epi32(__m512i __A, __m128i __B)
5865 {
5866   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5867 }
5868 
5869 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5870 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5871 {
5872   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5873                                           (__v16si)_mm512_srl_epi32(__A, __B),
5874                                           (__v16si)__W);
5875 }
5876 
5877 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5878 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5879 {
5880   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5881                                           (__v16si)_mm512_srl_epi32(__A, __B),
5882                                           (__v16si)_mm512_setzero_si512());
5883 }
5884 
5885 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5886 _mm512_srl_epi64(__m512i __A, __m128i __B)
5887 {
5888   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5889 }
5890 
5891 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5892 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5893 {
5894   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5895                                            (__v8di)_mm512_srl_epi64(__A, __B),
5896                                            (__v8di)__W);
5897 }
5898 
5899 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5900 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5901 {
5902   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5903                                            (__v8di)_mm512_srl_epi64(__A, __B),
5904                                            (__v8di)_mm512_setzero_si512());
5905 }
5906 
5907 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5908 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
5909 {
5910   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5911 }
5912 
5913 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5914 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5915 {
5916   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5917                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
5918                                            (__v16si)__W);
5919 }
5920 
5921 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5922 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5923 {
5924   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5925                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
5926                                            (__v16si)_mm512_setzero_si512());
5927 }
5928 
5929 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5930 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5931 {
5932   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5933 }
5934 
5935 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5936 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5937 {
5938   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5939                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
5940                                             (__v8di)__W);
5941 }
5942 
5943 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5944 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5945 {
5946   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5947                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
5948                                             (__v8di)_mm512_setzero_si512());
5949 }
5950 
5951 /// \enum _MM_TERNLOG_ENUM
5952 ///    A helper to represent the ternary logic operations among vector \a A,
5953 ///    \a B and \a C. The representation is passed to \a imm.
5954 typedef enum {
5955   _MM_TERNLOG_A = 0xF0,
5956   _MM_TERNLOG_B = 0xCC,
5957   _MM_TERNLOG_C = 0xAA
5958 } _MM_TERNLOG_ENUM;
5959 
5960 #define _mm512_ternarylogic_epi32(A, B, C, imm)                                \
5961   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
5962       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5963       (unsigned char)(imm), (__mmask16)-1))
5964 
5965 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
5966   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
5967       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5968       (unsigned char)(imm), (__mmask16)(U)))
5969 
5970 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
5971   ((__m512i)__builtin_ia32_pternlogd512_maskz(                                 \
5972       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5973       (unsigned char)(imm), (__mmask16)(U)))
5974 
5975 #define _mm512_ternarylogic_epi64(A, B, C, imm)                                \
5976   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
5977       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5978       (unsigned char)(imm), (__mmask8)-1))
5979 
5980 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
5981   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
5982       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5983       (unsigned char)(imm), (__mmask8)(U)))
5984 
5985 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
5986   ((__m512i)__builtin_ia32_pternlogq512_maskz(                                 \
5987       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5988       (unsigned char)(imm), (__mmask8)(U)))
5989 
5990 #ifdef __x86_64__
5991 #define _mm_cvt_roundsd_i64(A, R) \
5992   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5993 #endif
5994 
5995 #define _mm_cvt_roundsd_si32(A, R) \
5996   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5997 
5998 #define _mm_cvt_roundsd_i32(A, R) \
5999   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
6000 
6001 #define _mm_cvt_roundsd_u32(A, R) \
6002   ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6003 
6004 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6005 _mm_cvtsd_u32 (__m128d __A)
6006 {
6007   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6008              _MM_FROUND_CUR_DIRECTION);
6009 }
6010 
6011 #ifdef __x86_64__
6012 #define _mm_cvt_roundsd_u64(A, R) \
6013   ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6014                                                    (int)(R)))
6015 
6016 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6017 _mm_cvtsd_u64 (__m128d __A)
6018 {
6019   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6020                  __A,
6021                  _MM_FROUND_CUR_DIRECTION);
6022 }
6023 #endif
6024 
6025 #define _mm_cvt_roundss_si32(A, R) \
6026   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6027 
6028 #define _mm_cvt_roundss_i32(A, R) \
6029   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6030 
6031 #ifdef __x86_64__
6032 #define _mm_cvt_roundss_si64(A, R) \
6033   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6034 
6035 #define _mm_cvt_roundss_i64(A, R) \
6036   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6037 #endif
6038 
6039 #define _mm_cvt_roundss_u32(A, R) \
6040   ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
6041 
6042 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6043 _mm_cvtss_u32 (__m128 __A)
6044 {
6045   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6046              _MM_FROUND_CUR_DIRECTION);
6047 }
6048 
6049 #ifdef __x86_64__
6050 #define _mm_cvt_roundss_u64(A, R) \
6051   ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6052                                                    (int)(R)))
6053 
6054 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6055 _mm_cvtss_u64 (__m128 __A)
6056 {
6057   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6058                  __A,
6059                  _MM_FROUND_CUR_DIRECTION);
6060 }
6061 #endif
6062 
6063 #define _mm_cvtt_roundsd_i32(A, R) \
6064   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6065 
6066 #define _mm_cvtt_roundsd_si32(A, R) \
6067   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6068 
6069 static __inline__ int __DEFAULT_FN_ATTRS128
6070 _mm_cvttsd_i32 (__m128d __A)
6071 {
6072   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6073               _MM_FROUND_CUR_DIRECTION);
6074 }
6075 
6076 #ifdef __x86_64__
6077 #define _mm_cvtt_roundsd_si64(A, R) \
6078   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6079 
6080 #define _mm_cvtt_roundsd_i64(A, R) \
6081   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6082 
6083 static __inline__ long long __DEFAULT_FN_ATTRS128
6084 _mm_cvttsd_i64 (__m128d __A)
6085 {
6086   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6087               _MM_FROUND_CUR_DIRECTION);
6088 }
6089 #endif
6090 
6091 #define _mm_cvtt_roundsd_u32(A, R) \
6092   ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6093 
6094 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6095 _mm_cvttsd_u32 (__m128d __A)
6096 {
6097   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6098               _MM_FROUND_CUR_DIRECTION);
6099 }
6100 
6101 #ifdef __x86_64__
6102 #define _mm_cvtt_roundsd_u64(A, R) \
6103   ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6104                                                     (int)(R)))
6105 
6106 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6107 _mm_cvttsd_u64 (__m128d __A)
6108 {
6109   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6110                   __A,
6111                   _MM_FROUND_CUR_DIRECTION);
6112 }
6113 #endif
6114 
6115 #define _mm_cvtt_roundss_i32(A, R) \
6116   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6117 
6118 #define _mm_cvtt_roundss_si32(A, R) \
6119   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6120 
6121 static __inline__ int __DEFAULT_FN_ATTRS128
6122 _mm_cvttss_i32 (__m128 __A)
6123 {
6124   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6125               _MM_FROUND_CUR_DIRECTION);
6126 }
6127 
6128 #ifdef __x86_64__
6129 #define _mm_cvtt_roundss_i64(A, R) \
6130   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6131 
6132 #define _mm_cvtt_roundss_si64(A, R) \
6133   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6134 
6135 static __inline__ long long __DEFAULT_FN_ATTRS128
6136 _mm_cvttss_i64 (__m128 __A)
6137 {
6138   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6139               _MM_FROUND_CUR_DIRECTION);
6140 }
6141 #endif
6142 
6143 #define _mm_cvtt_roundss_u32(A, R) \
6144   ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6145 
6146 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6147 _mm_cvttss_u32 (__m128 __A)
6148 {
6149   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6150               _MM_FROUND_CUR_DIRECTION);
6151 }
6152 
6153 #ifdef __x86_64__
6154 #define _mm_cvtt_roundss_u64(A, R) \
6155   ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6156                                                     (int)(R)))
6157 
6158 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6159 _mm_cvttss_u64 (__m128 __A)
6160 {
6161   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6162                   __A,
6163                   _MM_FROUND_CUR_DIRECTION);
6164 }
6165 #endif
6166 
6167 #define _mm512_permute_pd(X, C) \
6168   ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6169 
6170 #define _mm512_mask_permute_pd(W, U, X, C) \
6171   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6172                                         (__v8df)_mm512_permute_pd((X), (C)), \
6173                                         (__v8df)(__m512d)(W)))
6174 
6175 #define _mm512_maskz_permute_pd(U, X, C) \
6176   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6177                                         (__v8df)_mm512_permute_pd((X), (C)), \
6178                                         (__v8df)_mm512_setzero_pd()))
6179 
6180 #define _mm512_permute_ps(X, C) \
6181   ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6182 
6183 #define _mm512_mask_permute_ps(W, U, X, C) \
6184   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6185                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6186                                        (__v16sf)(__m512)(W)))
6187 
6188 #define _mm512_maskz_permute_ps(U, X, C) \
6189   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6190                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6191                                        (__v16sf)_mm512_setzero_ps()))
6192 
6193 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6194 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6195 {
6196   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6197 }
6198 
6199 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6200 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6201 {
6202   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6203                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6204                                          (__v8df)__W);
6205 }
6206 
6207 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6208 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6209 {
6210   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6211                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6212                                          (__v8df)_mm512_setzero_pd());
6213 }
6214 
6215 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6216 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6217 {
6218   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6219 }
6220 
6221 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6222 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6223 {
6224   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6225                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6226                                         (__v16sf)__W);
6227 }
6228 
6229 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6230 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6231 {
6232   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6233                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6234                                         (__v16sf)_mm512_setzero_ps());
6235 }
6236 
6237 static __inline __m512d __DEFAULT_FN_ATTRS512
6238 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6239 {
6240   return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6241                                                  (__v8df)__B);
6242 }
6243 
6244 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6245 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6246 {
6247   return (__m512d)__builtin_ia32_selectpd_512(__U,
6248                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6249                                   (__v8df)__A);
6250 }
6251 
6252 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6253 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6254                              __m512d __B)
6255 {
6256   return (__m512d)__builtin_ia32_selectpd_512(__U,
6257                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6258                                   (__v8df)(__m512d)__I);
6259 }
6260 
6261 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6262 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6263                              __m512d __B)
6264 {
6265   return (__m512d)__builtin_ia32_selectpd_512(__U,
6266                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6267                                   (__v8df)_mm512_setzero_pd());
6268 }
6269 
6270 static __inline __m512 __DEFAULT_FN_ATTRS512
6271 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6272 {
6273   return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6274                                                 (__v16sf) __B);
6275 }
6276 
6277 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6278 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6279 {
6280   return (__m512)__builtin_ia32_selectps_512(__U,
6281                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6282                                  (__v16sf)__A);
6283 }
6284 
6285 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6286 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6287 {
6288   return (__m512)__builtin_ia32_selectps_512(__U,
6289                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6290                                  (__v16sf)(__m512)__I);
6291 }
6292 
6293 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6294 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6295 {
6296   return (__m512)__builtin_ia32_selectps_512(__U,
6297                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6298                                  (__v16sf)_mm512_setzero_ps());
6299 }
6300 
6301 
6302 #define _mm512_cvtt_roundpd_epu32(A, R) \
6303   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6304                                               (__v8si)_mm256_undefined_si256(), \
6305                                               (__mmask8)-1, (int)(R)))
6306 
6307 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6308   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6309                                               (__v8si)(__m256i)(W), \
6310                                               (__mmask8)(U), (int)(R)))
6311 
6312 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6313   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6314                                               (__v8si)_mm256_setzero_si256(), \
6315                                               (__mmask8)(U), (int)(R)))
6316 
6317 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6318 _mm512_cvttpd_epu32 (__m512d __A)
6319 {
6320   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6321                   (__v8si)
6322                   _mm256_undefined_si256 (),
6323                   (__mmask8) -1,
6324                   _MM_FROUND_CUR_DIRECTION);
6325 }
6326 
6327 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6328 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6329 {
6330   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6331                   (__v8si) __W,
6332                   (__mmask8) __U,
6333                   _MM_FROUND_CUR_DIRECTION);
6334 }
6335 
6336 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6337 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6338 {
6339   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6340                   (__v8si)
6341                   _mm256_setzero_si256 (),
6342                   (__mmask8) __U,
6343                   _MM_FROUND_CUR_DIRECTION);
6344 }
6345 
6346 #define _mm_roundscale_round_sd(A, B, imm, R) \
6347   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6348                                                  (__v2df)(__m128d)(B), \
6349                                                  (__v2df)_mm_setzero_pd(), \
6350                                                  (__mmask8)-1, (int)(imm), \
6351                                                  (int)(R)))
6352 
6353 #define _mm_roundscale_sd(A, B, imm) \
6354   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6355                                                  (__v2df)(__m128d)(B), \
6356                                                  (__v2df)_mm_setzero_pd(), \
6357                                                  (__mmask8)-1, (int)(imm), \
6358                                                  _MM_FROUND_CUR_DIRECTION))
6359 
6360 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6361   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6362                                                  (__v2df)(__m128d)(B), \
6363                                                  (__v2df)(__m128d)(W), \
6364                                                  (__mmask8)(U), (int)(imm), \
6365                                                  _MM_FROUND_CUR_DIRECTION))
6366 
6367 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6368   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6369                                                  (__v2df)(__m128d)(B), \
6370                                                  (__v2df)(__m128d)(W), \
6371                                                  (__mmask8)(U), (int)(I), \
6372                                                  (int)(R)))
6373 
6374 #define _mm_maskz_roundscale_sd(U, A, B, I) \
6375   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6376                                                  (__v2df)(__m128d)(B), \
6377                                                  (__v2df)_mm_setzero_pd(), \
6378                                                  (__mmask8)(U), (int)(I), \
6379                                                  _MM_FROUND_CUR_DIRECTION))
6380 
6381 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6382   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6383                                                  (__v2df)(__m128d)(B), \
6384                                                  (__v2df)_mm_setzero_pd(), \
6385                                                  (__mmask8)(U), (int)(I), \
6386                                                  (int)(R)))
6387 
6388 #define _mm_roundscale_round_ss(A, B, imm, R) \
6389   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6390                                                 (__v4sf)(__m128)(B), \
6391                                                 (__v4sf)_mm_setzero_ps(), \
6392                                                 (__mmask8)-1, (int)(imm), \
6393                                                 (int)(R)))
6394 
6395 #define _mm_roundscale_ss(A, B, imm) \
6396   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6397                                                 (__v4sf)(__m128)(B), \
6398                                                 (__v4sf)_mm_setzero_ps(), \
6399                                                 (__mmask8)-1, (int)(imm), \
6400                                                 _MM_FROUND_CUR_DIRECTION))
6401 
6402 #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6403   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6404                                                 (__v4sf)(__m128)(B), \
6405                                                 (__v4sf)(__m128)(W), \
6406                                                 (__mmask8)(U), (int)(I), \
6407                                                 _MM_FROUND_CUR_DIRECTION))
6408 
6409 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6410   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6411                                                 (__v4sf)(__m128)(B), \
6412                                                 (__v4sf)(__m128)(W), \
6413                                                 (__mmask8)(U), (int)(I), \
6414                                                 (int)(R)))
6415 
6416 #define _mm_maskz_roundscale_ss(U, A, B, I) \
6417   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6418                                                 (__v4sf)(__m128)(B), \
6419                                                 (__v4sf)_mm_setzero_ps(), \
6420                                                 (__mmask8)(U), (int)(I), \
6421                                                 _MM_FROUND_CUR_DIRECTION))
6422 
6423 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6424   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6425                                                 (__v4sf)(__m128)(B), \
6426                                                 (__v4sf)_mm_setzero_ps(), \
6427                                                 (__mmask8)(U), (int)(I), \
6428                                                 (int)(R)))
6429 
6430 #define _mm512_scalef_round_pd(A, B, R) \
6431   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6432                                             (__v8df)(__m512d)(B), \
6433                                             (__v8df)_mm512_undefined_pd(), \
6434                                             (__mmask8)-1, (int)(R)))
6435 
6436 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6437   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6438                                             (__v8df)(__m512d)(B), \
6439                                             (__v8df)(__m512d)(W), \
6440                                             (__mmask8)(U), (int)(R)))
6441 
6442 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6443   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6444                                             (__v8df)(__m512d)(B), \
6445                                             (__v8df)_mm512_setzero_pd(), \
6446                                             (__mmask8)(U), (int)(R)))
6447 
6448 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6449 _mm512_scalef_pd (__m512d __A, __m512d __B)
6450 {
6451   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6452                 (__v8df) __B,
6453                 (__v8df)
6454                 _mm512_undefined_pd (),
6455                 (__mmask8) -1,
6456                 _MM_FROUND_CUR_DIRECTION);
6457 }
6458 
6459 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6460 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6461 {
6462   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6463                 (__v8df) __B,
6464                 (__v8df) __W,
6465                 (__mmask8) __U,
6466                 _MM_FROUND_CUR_DIRECTION);
6467 }
6468 
6469 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6470 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6471 {
6472   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6473                 (__v8df) __B,
6474                 (__v8df)
6475                 _mm512_setzero_pd (),
6476                 (__mmask8) __U,
6477                 _MM_FROUND_CUR_DIRECTION);
6478 }
6479 
6480 #define _mm512_scalef_round_ps(A, B, R) \
6481   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6482                                            (__v16sf)(__m512)(B), \
6483                                            (__v16sf)_mm512_undefined_ps(), \
6484                                            (__mmask16)-1, (int)(R)))
6485 
6486 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6487   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6488                                            (__v16sf)(__m512)(B), \
6489                                            (__v16sf)(__m512)(W), \
6490                                            (__mmask16)(U), (int)(R)))
6491 
6492 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6493   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6494                                            (__v16sf)(__m512)(B), \
6495                                            (__v16sf)_mm512_setzero_ps(), \
6496                                            (__mmask16)(U), (int)(R)))
6497 
6498 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6499 _mm512_scalef_ps (__m512 __A, __m512 __B)
6500 {
6501   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6502                (__v16sf) __B,
6503                (__v16sf)
6504                _mm512_undefined_ps (),
6505                (__mmask16) -1,
6506                _MM_FROUND_CUR_DIRECTION);
6507 }
6508 
6509 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6510 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6511 {
6512   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6513                (__v16sf) __B,
6514                (__v16sf) __W,
6515                (__mmask16) __U,
6516                _MM_FROUND_CUR_DIRECTION);
6517 }
6518 
6519 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6520 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6521 {
6522   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6523                (__v16sf) __B,
6524                (__v16sf)
6525                _mm512_setzero_ps (),
6526                (__mmask16) __U,
6527                _MM_FROUND_CUR_DIRECTION);
6528 }
6529 
6530 #define _mm_scalef_round_sd(A, B, R) \
6531   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6532                                                (__v2df)(__m128d)(B), \
6533                                                (__v2df)_mm_setzero_pd(), \
6534                                                (__mmask8)-1, (int)(R)))
6535 
6536 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6537 _mm_scalef_sd (__m128d __A, __m128d __B)
6538 {
6539   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6540               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6541               (__mmask8) -1,
6542               _MM_FROUND_CUR_DIRECTION);
6543 }
6544 
6545 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6546 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6547 {
6548  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6549                  (__v2df) __B,
6550                 (__v2df) __W,
6551                 (__mmask8) __U,
6552                 _MM_FROUND_CUR_DIRECTION);
6553 }
6554 
6555 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6556   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6557                                                (__v2df)(__m128d)(B), \
6558                                                (__v2df)(__m128d)(W), \
6559                                                (__mmask8)(U), (int)(R)))
6560 
6561 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6562 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6563 {
6564  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6565                  (__v2df) __B,
6566                 (__v2df) _mm_setzero_pd (),
6567                 (__mmask8) __U,
6568                 _MM_FROUND_CUR_DIRECTION);
6569 }
6570 
6571 #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6572   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6573                                                (__v2df)(__m128d)(B), \
6574                                                (__v2df)_mm_setzero_pd(), \
6575                                                (__mmask8)(U), (int)(R)))
6576 
6577 #define _mm_scalef_round_ss(A, B, R) \
6578   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6579                                               (__v4sf)(__m128)(B), \
6580                                               (__v4sf)_mm_setzero_ps(), \
6581                                               (__mmask8)-1, (int)(R)))
6582 
6583 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6584 _mm_scalef_ss (__m128 __A, __m128 __B)
6585 {
6586   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6587              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6588              (__mmask8) -1,
6589              _MM_FROUND_CUR_DIRECTION);
6590 }
6591 
6592 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6593 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6594 {
6595  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6596                 (__v4sf) __B,
6597                 (__v4sf) __W,
6598                 (__mmask8) __U,
6599                 _MM_FROUND_CUR_DIRECTION);
6600 }
6601 
6602 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6603   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6604                                               (__v4sf)(__m128)(B), \
6605                                               (__v4sf)(__m128)(W), \
6606                                               (__mmask8)(U), (int)(R)))
6607 
6608 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6609 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6610 {
6611  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6612                  (__v4sf) __B,
6613                 (__v4sf) _mm_setzero_ps (),
6614                 (__mmask8) __U,
6615                 _MM_FROUND_CUR_DIRECTION);
6616 }
6617 
6618 #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6619   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6620                                               (__v4sf)(__m128)(B), \
6621                                               (__v4sf)_mm_setzero_ps(), \
6622                                               (__mmask8)(U), \
6623                                               (int)(R)))
6624 
6625 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6626 _mm512_srai_epi32(__m512i __A, unsigned int __B)
6627 {
6628   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6629 }
6630 
6631 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6632 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6633                        unsigned int __B)
6634 {
6635   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6636                                          (__v16si)_mm512_srai_epi32(__A, __B),
6637                                          (__v16si)__W);
6638 }
6639 
6640 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6641 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
6642                         unsigned int __B) {
6643   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6644                                          (__v16si)_mm512_srai_epi32(__A, __B),
6645                                          (__v16si)_mm512_setzero_si512());
6646 }
6647 
6648 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6649 _mm512_srai_epi64(__m512i __A, unsigned int __B)
6650 {
6651   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6652 }
6653 
6654 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6655 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
6656 {
6657   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6658                                           (__v8di)_mm512_srai_epi64(__A, __B),
6659                                           (__v8di)__W);
6660 }
6661 
6662 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6663 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
6664 {
6665   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6666                                           (__v8di)_mm512_srai_epi64(__A, __B),
6667                                           (__v8di)_mm512_setzero_si512());
6668 }
6669 
6670 #define _mm512_shuffle_f32x4(A, B, imm) \
6671   ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6672                                      (__v16sf)(__m512)(B), (int)(imm)))
6673 
6674 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6675   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6676                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6677                                        (__v16sf)(__m512)(W)))
6678 
6679 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6680   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6681                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6682                                        (__v16sf)_mm512_setzero_ps()))
6683 
6684 #define _mm512_shuffle_f64x2(A, B, imm) \
6685   ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6686                                       (__v8df)(__m512d)(B), (int)(imm)))
6687 
6688 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6689   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6690                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6691                                         (__v8df)(__m512d)(W)))
6692 
6693 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6694   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6695                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6696                                         (__v8df)_mm512_setzero_pd()))
6697 
6698 #define _mm512_shuffle_i32x4(A, B, imm) \
6699   ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6700                                       (__v16si)(__m512i)(B), (int)(imm)))
6701 
6702 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6703   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6704                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6705                                        (__v16si)(__m512i)(W)))
6706 
6707 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6708   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6709                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6710                                        (__v16si)_mm512_setzero_si512()))
6711 
6712 #define _mm512_shuffle_i64x2(A, B, imm) \
6713   ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6714                                       (__v8di)(__m512i)(B), (int)(imm)))
6715 
6716 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6717   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6718                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6719                                        (__v8di)(__m512i)(W)))
6720 
6721 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6722   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6723                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6724                                        (__v8di)_mm512_setzero_si512()))
6725 
6726 #define _mm512_shuffle_pd(A, B, M) \
6727   ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6728                                      (__v8df)(__m512d)(B), (int)(M)))
6729 
6730 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6731   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6732                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6733                                         (__v8df)(__m512d)(W)))
6734 
6735 #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6736   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6737                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6738                                         (__v8df)_mm512_setzero_pd()))
6739 
6740 #define _mm512_shuffle_ps(A, B, M) \
6741   ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6742                                     (__v16sf)(__m512)(B), (int)(M)))
6743 
6744 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6745   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6746                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6747                                        (__v16sf)(__m512)(W)))
6748 
6749 #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6750   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6751                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6752                                        (__v16sf)_mm512_setzero_ps()))
6753 
6754 #define _mm_sqrt_round_sd(A, B, R) \
6755   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6756                                              (__v2df)(__m128d)(B), \
6757                                              (__v2df)_mm_setzero_pd(), \
6758                                              (__mmask8)-1, (int)(R)))
6759 
6760 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6761 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6762 {
6763  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6764                  (__v2df) __B,
6765                 (__v2df) __W,
6766                 (__mmask8) __U,
6767                 _MM_FROUND_CUR_DIRECTION);
6768 }
6769 
6770 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6771   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6772                                              (__v2df)(__m128d)(B), \
6773                                              (__v2df)(__m128d)(W), \
6774                                              (__mmask8)(U), (int)(R)))
6775 
6776 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6777 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6778 {
6779  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6780                  (__v2df) __B,
6781                 (__v2df) _mm_setzero_pd (),
6782                 (__mmask8) __U,
6783                 _MM_FROUND_CUR_DIRECTION);
6784 }
6785 
6786 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6787   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6788                                              (__v2df)(__m128d)(B), \
6789                                              (__v2df)_mm_setzero_pd(), \
6790                                              (__mmask8)(U), (int)(R)))
6791 
6792 #define _mm_sqrt_round_ss(A, B, R) \
6793   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6794                                             (__v4sf)(__m128)(B), \
6795                                             (__v4sf)_mm_setzero_ps(), \
6796                                             (__mmask8)-1, (int)(R)))
6797 
6798 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6799 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6800 {
6801  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6802                  (__v4sf) __B,
6803                 (__v4sf) __W,
6804                 (__mmask8) __U,
6805                 _MM_FROUND_CUR_DIRECTION);
6806 }
6807 
6808 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6809   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6810                                             (__v4sf)(__m128)(B), \
6811                                             (__v4sf)(__m128)(W), (__mmask8)(U), \
6812                                             (int)(R)))
6813 
6814 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6815 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6816 {
6817  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6818                  (__v4sf) __B,
6819                 (__v4sf) _mm_setzero_ps (),
6820                 (__mmask8) __U,
6821                 _MM_FROUND_CUR_DIRECTION);
6822 }
6823 
6824 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6825   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6826                                             (__v4sf)(__m128)(B), \
6827                                             (__v4sf)_mm_setzero_ps(), \
6828                                             (__mmask8)(U), (int)(R)))
6829 
6830 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6831 _mm512_broadcast_f32x4(__m128 __A)
6832 {
6833   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6834                                          0, 1, 2, 3, 0, 1, 2, 3,
6835                                          0, 1, 2, 3, 0, 1, 2, 3);
6836 }
6837 
6838 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6839 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6840 {
6841   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6842                                            (__v16sf)_mm512_broadcast_f32x4(__A),
6843                                            (__v16sf)__O);
6844 }
6845 
6846 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6847 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
6848 {
6849   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6850                                            (__v16sf)_mm512_broadcast_f32x4(__A),
6851                                            (__v16sf)_mm512_setzero_ps());
6852 }
6853 
6854 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6855 _mm512_broadcast_f64x4(__m256d __A)
6856 {
6857   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6858                                           0, 1, 2, 3, 0, 1, 2, 3);
6859 }
6860 
6861 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6862 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6863 {
6864   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6865                                             (__v8df)_mm512_broadcast_f64x4(__A),
6866                                             (__v8df)__O);
6867 }
6868 
6869 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6870 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
6871 {
6872   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6873                                             (__v8df)_mm512_broadcast_f64x4(__A),
6874                                             (__v8df)_mm512_setzero_pd());
6875 }
6876 
6877 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6878 _mm512_broadcast_i32x4(__m128i __A)
6879 {
6880   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6881                                           0, 1, 2, 3, 0, 1, 2, 3,
6882                                           0, 1, 2, 3, 0, 1, 2, 3);
6883 }
6884 
6885 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6886 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6887 {
6888   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6889                                            (__v16si)_mm512_broadcast_i32x4(__A),
6890                                            (__v16si)__O);
6891 }
6892 
6893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6894 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
6895 {
6896   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6897                                            (__v16si)_mm512_broadcast_i32x4(__A),
6898                                            (__v16si)_mm512_setzero_si512());
6899 }
6900 
6901 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6902 _mm512_broadcast_i64x4(__m256i __A)
6903 {
6904   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6905                                           0, 1, 2, 3, 0, 1, 2, 3);
6906 }
6907 
6908 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6909 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6910 {
6911   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6912                                             (__v8di)_mm512_broadcast_i64x4(__A),
6913                                             (__v8di)__O);
6914 }
6915 
6916 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6917 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
6918 {
6919   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6920                                             (__v8di)_mm512_broadcast_i64x4(__A),
6921                                             (__v8di)_mm512_setzero_si512());
6922 }
6923 
6924 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6925 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6926 {
6927   return (__m512d)__builtin_ia32_selectpd_512(__M,
6928                                               (__v8df) _mm512_broadcastsd_pd(__A),
6929                                               (__v8df) __O);
6930 }
6931 
6932 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6933 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6934 {
6935   return (__m512d)__builtin_ia32_selectpd_512(__M,
6936                                               (__v8df) _mm512_broadcastsd_pd(__A),
6937                                               (__v8df) _mm512_setzero_pd());
6938 }
6939 
6940 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6941 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6942 {
6943   return (__m512)__builtin_ia32_selectps_512(__M,
6944                                              (__v16sf) _mm512_broadcastss_ps(__A),
6945                                              (__v16sf) __O);
6946 }
6947 
6948 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6949 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6950 {
6951   return (__m512)__builtin_ia32_selectps_512(__M,
6952                                              (__v16sf) _mm512_broadcastss_ps(__A),
6953                                              (__v16sf) _mm512_setzero_ps());
6954 }
6955 
6956 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6957 _mm512_cvtsepi32_epi8 (__m512i __A)
6958 {
6959   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6960                (__v16qi) _mm_undefined_si128 (),
6961                (__mmask16) -1);
6962 }
6963 
6964 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6965 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6966 {
6967   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6968                (__v16qi) __O, __M);
6969 }
6970 
6971 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6972 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6973 {
6974   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6975                (__v16qi) _mm_setzero_si128 (),
6976                __M);
6977 }
6978 
6979 static __inline__ void __DEFAULT_FN_ATTRS512
6980 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6981 {
6982   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6983 }
6984 
6985 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6986 _mm512_cvtsepi32_epi16 (__m512i __A)
6987 {
6988   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6989                (__v16hi) _mm256_undefined_si256 (),
6990                (__mmask16) -1);
6991 }
6992 
6993 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6994 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6995 {
6996   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6997                (__v16hi) __O, __M);
6998 }
6999 
7000 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7001 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7002 {
7003   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7004                (__v16hi) _mm256_setzero_si256 (),
7005                __M);
7006 }
7007 
7008 static __inline__ void __DEFAULT_FN_ATTRS512
7009 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7010 {
7011   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7012 }
7013 
7014 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7015 _mm512_cvtsepi64_epi8 (__m512i __A)
7016 {
7017   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7018                (__v16qi) _mm_undefined_si128 (),
7019                (__mmask8) -1);
7020 }
7021 
7022 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7023 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7024 {
7025   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7026                (__v16qi) __O, __M);
7027 }
7028 
7029 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7030 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7031 {
7032   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7033                (__v16qi) _mm_setzero_si128 (),
7034                __M);
7035 }
7036 
7037 static __inline__ void __DEFAULT_FN_ATTRS512
7038 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7039 {
7040   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7041 }
7042 
7043 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7044 _mm512_cvtsepi64_epi32 (__m512i __A)
7045 {
7046   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7047                (__v8si) _mm256_undefined_si256 (),
7048                (__mmask8) -1);
7049 }
7050 
7051 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7052 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7053 {
7054   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7055                (__v8si) __O, __M);
7056 }
7057 
7058 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7059 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7060 {
7061   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7062                (__v8si) _mm256_setzero_si256 (),
7063                __M);
7064 }
7065 
7066 static __inline__ void __DEFAULT_FN_ATTRS512
7067 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7068 {
7069   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7070 }
7071 
7072 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7073 _mm512_cvtsepi64_epi16 (__m512i __A)
7074 {
7075   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7076                (__v8hi) _mm_undefined_si128 (),
7077                (__mmask8) -1);
7078 }
7079 
7080 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7081 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7082 {
7083   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7084                (__v8hi) __O, __M);
7085 }
7086 
7087 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7088 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7089 {
7090   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7091                (__v8hi) _mm_setzero_si128 (),
7092                __M);
7093 }
7094 
7095 static __inline__ void __DEFAULT_FN_ATTRS512
7096 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7097 {
7098   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7099 }
7100 
7101 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7102 _mm512_cvtusepi32_epi8 (__m512i __A)
7103 {
7104   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7105                 (__v16qi) _mm_undefined_si128 (),
7106                 (__mmask16) -1);
7107 }
7108 
7109 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7110 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7111 {
7112   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7113                 (__v16qi) __O,
7114                 __M);
7115 }
7116 
7117 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7118 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7119 {
7120   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7121                 (__v16qi) _mm_setzero_si128 (),
7122                 __M);
7123 }
7124 
7125 static __inline__ void __DEFAULT_FN_ATTRS512
7126 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7127 {
7128   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7129 }
7130 
7131 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7132 _mm512_cvtusepi32_epi16 (__m512i __A)
7133 {
7134   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7135                 (__v16hi) _mm256_undefined_si256 (),
7136                 (__mmask16) -1);
7137 }
7138 
7139 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7140 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7141 {
7142   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7143                 (__v16hi) __O,
7144                 __M);
7145 }
7146 
7147 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7148 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7149 {
7150   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7151                 (__v16hi) _mm256_setzero_si256 (),
7152                 __M);
7153 }
7154 
7155 static __inline__ void __DEFAULT_FN_ATTRS512
7156 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7157 {
7158   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7159 }
7160 
7161 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7162 _mm512_cvtusepi64_epi8 (__m512i __A)
7163 {
7164   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7165                 (__v16qi) _mm_undefined_si128 (),
7166                 (__mmask8) -1);
7167 }
7168 
7169 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7170 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7171 {
7172   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7173                 (__v16qi) __O,
7174                 __M);
7175 }
7176 
7177 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7178 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7179 {
7180   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7181                 (__v16qi) _mm_setzero_si128 (),
7182                 __M);
7183 }
7184 
7185 static __inline__ void __DEFAULT_FN_ATTRS512
7186 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7187 {
7188   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7189 }
7190 
7191 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7192 _mm512_cvtusepi64_epi32 (__m512i __A)
7193 {
7194   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7195                 (__v8si) _mm256_undefined_si256 (),
7196                 (__mmask8) -1);
7197 }
7198 
7199 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7200 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7201 {
7202   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7203                 (__v8si) __O, __M);
7204 }
7205 
7206 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7207 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7208 {
7209   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7210                 (__v8si) _mm256_setzero_si256 (),
7211                 __M);
7212 }
7213 
7214 static __inline__ void __DEFAULT_FN_ATTRS512
7215 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7216 {
7217   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7218 }
7219 
7220 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7221 _mm512_cvtusepi64_epi16 (__m512i __A)
7222 {
7223   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7224                 (__v8hi) _mm_undefined_si128 (),
7225                 (__mmask8) -1);
7226 }
7227 
7228 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7229 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7230 {
7231   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7232                 (__v8hi) __O, __M);
7233 }
7234 
7235 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7236 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7237 {
7238   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7239                 (__v8hi) _mm_setzero_si128 (),
7240                 __M);
7241 }
7242 
7243 static __inline__ void __DEFAULT_FN_ATTRS512
7244 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7245 {
7246   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7247 }
7248 
7249 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7250 _mm512_cvtepi32_epi8 (__m512i __A)
7251 {
7252   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7253               (__v16qi) _mm_undefined_si128 (),
7254               (__mmask16) -1);
7255 }
7256 
7257 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7258 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7259 {
7260   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7261               (__v16qi) __O, __M);
7262 }
7263 
7264 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7265 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7266 {
7267   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7268               (__v16qi) _mm_setzero_si128 (),
7269               __M);
7270 }
7271 
7272 static __inline__ void __DEFAULT_FN_ATTRS512
7273 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7274 {
7275   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7276 }
7277 
7278 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7279 _mm512_cvtepi32_epi16 (__m512i __A)
7280 {
7281   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7282               (__v16hi) _mm256_undefined_si256 (),
7283               (__mmask16) -1);
7284 }
7285 
7286 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7287 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7288 {
7289   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7290               (__v16hi) __O, __M);
7291 }
7292 
7293 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7294 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7295 {
7296   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7297               (__v16hi) _mm256_setzero_si256 (),
7298               __M);
7299 }
7300 
7301 static __inline__ void __DEFAULT_FN_ATTRS512
7302 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7303 {
7304   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7305 }
7306 
7307 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7308 _mm512_cvtepi64_epi8 (__m512i __A)
7309 {
7310   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7311               (__v16qi) _mm_undefined_si128 (),
7312               (__mmask8) -1);
7313 }
7314 
7315 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7316 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7317 {
7318   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7319               (__v16qi) __O, __M);
7320 }
7321 
7322 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7323 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7324 {
7325   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7326               (__v16qi) _mm_setzero_si128 (),
7327               __M);
7328 }
7329 
7330 static __inline__ void __DEFAULT_FN_ATTRS512
7331 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7332 {
7333   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7334 }
7335 
7336 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7337 _mm512_cvtepi64_epi32 (__m512i __A)
7338 {
7339   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7340               (__v8si) _mm256_undefined_si256 (),
7341               (__mmask8) -1);
7342 }
7343 
7344 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7345 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7346 {
7347   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7348               (__v8si) __O, __M);
7349 }
7350 
7351 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7352 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7353 {
7354   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7355               (__v8si) _mm256_setzero_si256 (),
7356               __M);
7357 }
7358 
7359 static __inline__ void __DEFAULT_FN_ATTRS512
7360 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7361 {
7362   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7363 }
7364 
7365 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7366 _mm512_cvtepi64_epi16 (__m512i __A)
7367 {
7368   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7369               (__v8hi) _mm_undefined_si128 (),
7370               (__mmask8) -1);
7371 }
7372 
7373 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7374 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7375 {
7376   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7377               (__v8hi) __O, __M);
7378 }
7379 
7380 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7381 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7382 {
7383   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7384               (__v8hi) _mm_setzero_si128 (),
7385               __M);
7386 }
7387 
7388 static __inline__ void __DEFAULT_FN_ATTRS512
7389 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7390 {
7391   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7392 }
7393 
7394 #define _mm512_extracti32x4_epi32(A, imm) \
7395   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7396                                              (__v4si)_mm_undefined_si128(), \
7397                                              (__mmask8)-1))
7398 
7399 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7400   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7401                                              (__v4si)(__m128i)(W), \
7402                                              (__mmask8)(U)))
7403 
7404 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7405   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7406                                              (__v4si)_mm_setzero_si128(), \
7407                                              (__mmask8)(U)))
7408 
7409 #define _mm512_extracti64x4_epi64(A, imm) \
7410   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7411                                              (__v4di)_mm256_undefined_si256(), \
7412                                              (__mmask8)-1))
7413 
7414 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7415   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7416                                              (__v4di)(__m256i)(W), \
7417                                              (__mmask8)(U)))
7418 
7419 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7420   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7421                                              (__v4di)_mm256_setzero_si256(), \
7422                                              (__mmask8)(U)))
7423 
7424 #define _mm512_insertf64x4(A, B, imm) \
7425   ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7426                                        (__v4df)(__m256d)(B), (int)(imm)))
7427 
7428 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7429   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7430                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7431                                    (__v8df)(__m512d)(W)))
7432 
7433 #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7434   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7435                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7436                                    (__v8df)_mm512_setzero_pd()))
7437 
7438 #define _mm512_inserti64x4(A, B, imm) \
7439   ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7440                                        (__v4di)(__m256i)(B), (int)(imm)))
7441 
7442 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7443   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7444                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7445                                    (__v8di)(__m512i)(W)))
7446 
7447 #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7448   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7449                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7450                                    (__v8di)_mm512_setzero_si512()))
7451 
7452 #define _mm512_insertf32x4(A, B, imm) \
7453   ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7454                                       (__v4sf)(__m128)(B), (int)(imm)))
7455 
7456 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7457   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7458                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7459                                   (__v16sf)(__m512)(W)))
7460 
7461 #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7462   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7463                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7464                                   (__v16sf)_mm512_setzero_ps()))
7465 
7466 #define _mm512_inserti32x4(A, B, imm) \
7467   ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7468                                        (__v4si)(__m128i)(B), (int)(imm)))
7469 
7470 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7471   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7472                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7473                                   (__v16si)(__m512i)(W)))
7474 
7475 #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7476   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7477                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7478                                   (__v16si)_mm512_setzero_si512()))
7479 
7480 #define _mm512_getmant_round_pd(A, B, C, R) \
7481   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7482                                              (int)(((C)<<2) | (B)), \
7483                                              (__v8df)_mm512_undefined_pd(), \
7484                                              (__mmask8)-1, (int)(R)))
7485 
7486 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7487   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7488                                              (int)(((C)<<2) | (B)), \
7489                                              (__v8df)(__m512d)(W), \
7490                                              (__mmask8)(U), (int)(R)))
7491 
7492 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7493   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7494                                              (int)(((C)<<2) | (B)), \
7495                                              (__v8df)_mm512_setzero_pd(), \
7496                                              (__mmask8)(U), (int)(R)))
7497 
7498 #define _mm512_getmant_pd(A, B, C) \
7499   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7500                                              (int)(((C)<<2) | (B)), \
7501                                              (__v8df)_mm512_setzero_pd(), \
7502                                              (__mmask8)-1, \
7503                                              _MM_FROUND_CUR_DIRECTION))
7504 
7505 #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7506   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7507                                              (int)(((C)<<2) | (B)), \
7508                                              (__v8df)(__m512d)(W), \
7509                                              (__mmask8)(U), \
7510                                              _MM_FROUND_CUR_DIRECTION))
7511 
7512 #define _mm512_maskz_getmant_pd(U, A, B, C) \
7513   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7514                                              (int)(((C)<<2) | (B)), \
7515                                              (__v8df)_mm512_setzero_pd(), \
7516                                              (__mmask8)(U), \
7517                                              _MM_FROUND_CUR_DIRECTION))
7518 
7519 #define _mm512_getmant_round_ps(A, B, C, R) \
7520   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7521                                             (int)(((C)<<2) | (B)), \
7522                                             (__v16sf)_mm512_undefined_ps(), \
7523                                             (__mmask16)-1, (int)(R)))
7524 
7525 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7526   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7527                                             (int)(((C)<<2) | (B)), \
7528                                             (__v16sf)(__m512)(W), \
7529                                             (__mmask16)(U), (int)(R)))
7530 
7531 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7532   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7533                                             (int)(((C)<<2) | (B)), \
7534                                             (__v16sf)_mm512_setzero_ps(), \
7535                                             (__mmask16)(U), (int)(R)))
7536 
7537 #define _mm512_getmant_ps(A, B, C) \
7538   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7539                                             (int)(((C)<<2)|(B)), \
7540                                             (__v16sf)_mm512_undefined_ps(), \
7541                                             (__mmask16)-1, \
7542                                             _MM_FROUND_CUR_DIRECTION))
7543 
7544 #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7545   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7546                                             (int)(((C)<<2)|(B)), \
7547                                             (__v16sf)(__m512)(W), \
7548                                             (__mmask16)(U), \
7549                                             _MM_FROUND_CUR_DIRECTION))
7550 
7551 #define _mm512_maskz_getmant_ps(U, A, B, C) \
7552   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7553                                             (int)(((C)<<2)|(B)), \
7554                                             (__v16sf)_mm512_setzero_ps(), \
7555                                             (__mmask16)(U), \
7556                                             _MM_FROUND_CUR_DIRECTION))
7557 
7558 #define _mm512_getexp_round_pd(A, R) \
7559   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7560                                             (__v8df)_mm512_undefined_pd(), \
7561                                             (__mmask8)-1, (int)(R)))
7562 
7563 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7564   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7565                                             (__v8df)(__m512d)(W), \
7566                                             (__mmask8)(U), (int)(R)))
7567 
7568 #define _mm512_maskz_getexp_round_pd(U, A, R) \
7569   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7570                                             (__v8df)_mm512_setzero_pd(), \
7571                                             (__mmask8)(U), (int)(R)))
7572 
7573 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7574 _mm512_getexp_pd (__m512d __A)
7575 {
7576   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7577                 (__v8df) _mm512_undefined_pd (),
7578                 (__mmask8) -1,
7579                 _MM_FROUND_CUR_DIRECTION);
7580 }
7581 
7582 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7583 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7584 {
7585   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7586                 (__v8df) __W,
7587                 (__mmask8) __U,
7588                 _MM_FROUND_CUR_DIRECTION);
7589 }
7590 
7591 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7592 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7593 {
7594   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7595                 (__v8df) _mm512_setzero_pd (),
7596                 (__mmask8) __U,
7597                 _MM_FROUND_CUR_DIRECTION);
7598 }
7599 
7600 #define _mm512_getexp_round_ps(A, R) \
7601   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7602                                            (__v16sf)_mm512_undefined_ps(), \
7603                                            (__mmask16)-1, (int)(R)))
7604 
7605 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7606   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7607                                            (__v16sf)(__m512)(W), \
7608                                            (__mmask16)(U), (int)(R)))
7609 
7610 #define _mm512_maskz_getexp_round_ps(U, A, R) \
7611   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7612                                            (__v16sf)_mm512_setzero_ps(), \
7613                                            (__mmask16)(U), (int)(R)))
7614 
7615 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7616 _mm512_getexp_ps (__m512 __A)
7617 {
7618   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7619                (__v16sf) _mm512_undefined_ps (),
7620                (__mmask16) -1,
7621                _MM_FROUND_CUR_DIRECTION);
7622 }
7623 
7624 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7625 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7626 {
7627   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7628                (__v16sf) __W,
7629                (__mmask16) __U,
7630                _MM_FROUND_CUR_DIRECTION);
7631 }
7632 
7633 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7634 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7635 {
7636   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7637                (__v16sf) _mm512_setzero_ps (),
7638                (__mmask16) __U,
7639                _MM_FROUND_CUR_DIRECTION);
7640 }
7641 
7642 #define _mm512_i64gather_ps(index, addr, scale) \
7643   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7644                                         (void const *)(addr), \
7645                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7646                                         (int)(scale)))
7647 
7648 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7649   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7650                                         (void const *)(addr), \
7651                                         (__v8di)(__m512i)(index), \
7652                                         (__mmask8)(mask), (int)(scale)))
7653 
7654 #define _mm512_i64gather_epi32(index, addr, scale) \
7655   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7656                                          (void const *)(addr), \
7657                                          (__v8di)(__m512i)(index), \
7658                                          (__mmask8)-1, (int)(scale)))
7659 
7660 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7661   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7662                                          (void const *)(addr), \
7663                                          (__v8di)(__m512i)(index), \
7664                                          (__mmask8)(mask), (int)(scale)))
7665 
7666 #define _mm512_i64gather_pd(index, addr, scale) \
7667   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7668                                         (void const *)(addr), \
7669                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7670                                         (int)(scale)))
7671 
7672 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7673   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7674                                         (void const *)(addr), \
7675                                         (__v8di)(__m512i)(index), \
7676                                         (__mmask8)(mask), (int)(scale)))
7677 
7678 #define _mm512_i64gather_epi64(index, addr, scale) \
7679   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7680                                         (void const *)(addr), \
7681                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7682                                         (int)(scale)))
7683 
7684 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7685   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7686                                         (void const *)(addr), \
7687                                         (__v8di)(__m512i)(index), \
7688                                         (__mmask8)(mask), (int)(scale)))
7689 
7690 #define _mm512_i32gather_ps(index, addr, scale) \
7691   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7692                                         (void const *)(addr), \
7693                                         (__v16si)(__m512)(index), \
7694                                         (__mmask16)-1, (int)(scale)))
7695 
7696 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7697   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7698                                         (void const *)(addr), \
7699                                         (__v16si)(__m512)(index), \
7700                                         (__mmask16)(mask), (int)(scale)))
7701 
7702 #define _mm512_i32gather_epi32(index, addr, scale) \
7703   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7704                                          (void const *)(addr), \
7705                                          (__v16si)(__m512i)(index), \
7706                                          (__mmask16)-1, (int)(scale)))
7707 
7708 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7709   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7710                                          (void const *)(addr), \
7711                                          (__v16si)(__m512i)(index), \
7712                                          (__mmask16)(mask), (int)(scale)))
7713 
7714 #define _mm512_i32gather_pd(index, addr, scale) \
7715   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7716                                         (void const *)(addr), \
7717                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7718                                         (int)(scale)))
7719 
7720 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7721   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7722                                         (void const *)(addr), \
7723                                         (__v8si)(__m256i)(index), \
7724                                         (__mmask8)(mask), (int)(scale)))
7725 
7726 #define _mm512_i32gather_epi64(index, addr, scale) \
7727   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7728                                         (void const *)(addr), \
7729                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7730                                         (int)(scale)))
7731 
7732 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7733   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7734                                         (void const *)(addr), \
7735                                         (__v8si)(__m256i)(index), \
7736                                         (__mmask8)(mask), (int)(scale)))
7737 
7738 #define _mm512_i64scatter_ps(addr, index, v1, scale) \
7739   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7740                                 (__v8di)(__m512i)(index), \
7741                                 (__v8sf)(__m256)(v1), (int)(scale))
7742 
7743 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7744   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7745                                 (__v8di)(__m512i)(index), \
7746                                 (__v8sf)(__m256)(v1), (int)(scale))
7747 
7748 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7749   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7750                                 (__v8di)(__m512i)(index), \
7751                                 (__v8si)(__m256i)(v1), (int)(scale))
7752 
7753 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7754   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7755                                 (__v8di)(__m512i)(index), \
7756                                 (__v8si)(__m256i)(v1), (int)(scale))
7757 
7758 #define _mm512_i64scatter_pd(addr, index, v1, scale) \
7759   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7760                                (__v8di)(__m512i)(index), \
7761                                (__v8df)(__m512d)(v1), (int)(scale))
7762 
7763 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7764   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7765                                (__v8di)(__m512i)(index), \
7766                                (__v8df)(__m512d)(v1), (int)(scale))
7767 
7768 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7769   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7770                                (__v8di)(__m512i)(index), \
7771                                (__v8di)(__m512i)(v1), (int)(scale))
7772 
7773 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7774   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7775                                (__v8di)(__m512i)(index), \
7776                                (__v8di)(__m512i)(v1), (int)(scale))
7777 
7778 #define _mm512_i32scatter_ps(addr, index, v1, scale) \
7779   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7780                                 (__v16si)(__m512i)(index), \
7781                                 (__v16sf)(__m512)(v1), (int)(scale))
7782 
7783 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7784   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7785                                 (__v16si)(__m512i)(index), \
7786                                 (__v16sf)(__m512)(v1), (int)(scale))
7787 
7788 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7789   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7790                                 (__v16si)(__m512i)(index), \
7791                                 (__v16si)(__m512i)(v1), (int)(scale))
7792 
7793 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7794   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7795                                 (__v16si)(__m512i)(index), \
7796                                 (__v16si)(__m512i)(v1), (int)(scale))
7797 
7798 #define _mm512_i32scatter_pd(addr, index, v1, scale) \
7799   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7800                                (__v8si)(__m256i)(index), \
7801                                (__v8df)(__m512d)(v1), (int)(scale))
7802 
7803 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7804   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7805                                (__v8si)(__m256i)(index), \
7806                                (__v8df)(__m512d)(v1), (int)(scale))
7807 
7808 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7809   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7810                                (__v8si)(__m256i)(index), \
7811                                (__v8di)(__m512i)(v1), (int)(scale))
7812 
7813 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7814   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7815                                (__v8si)(__m256i)(index), \
7816                                (__v8di)(__m512i)(v1), (int)(scale))
7817 
7818 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7819 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7820 {
7821   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7822                                        (__v4sf)__A,
7823                                        (__v4sf)__B,
7824                                        (__mmask8)__U,
7825                                        _MM_FROUND_CUR_DIRECTION);
7826 }
7827 
7828 #define _mm_fmadd_round_ss(A, B, C, R) \
7829   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7830                                          (__v4sf)(__m128)(B), \
7831                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7832                                          (int)(R)))
7833 
7834 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7835   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7836                                          (__v4sf)(__m128)(A), \
7837                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7838                                          (int)(R)))
7839 
7840 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7841 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7842 {
7843   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7844                                         (__v4sf)__B,
7845                                         (__v4sf)__C,
7846                                         (__mmask8)__U,
7847                                         _MM_FROUND_CUR_DIRECTION);
7848 }
7849 
7850 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7851   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7852                                           (__v4sf)(__m128)(B), \
7853                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7854                                           (int)(R)))
7855 
7856 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7857 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7858 {
7859   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7860                                         (__v4sf)__X,
7861                                         (__v4sf)__Y,
7862                                         (__mmask8)__U,
7863                                         _MM_FROUND_CUR_DIRECTION);
7864 }
7865 
7866 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7867   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7868                                           (__v4sf)(__m128)(X), \
7869                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7870                                           (int)(R)))
7871 
7872 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7873 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7874 {
7875   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7876                                        (__v4sf)__A,
7877                                        -(__v4sf)__B,
7878                                        (__mmask8)__U,
7879                                        _MM_FROUND_CUR_DIRECTION);
7880 }
7881 
7882 #define _mm_fmsub_round_ss(A, B, C, R) \
7883   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7884                                          (__v4sf)(__m128)(B), \
7885                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7886                                          (int)(R)))
7887 
7888 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7889   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7890                                          (__v4sf)(__m128)(A), \
7891                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
7892                                          (int)(R)))
7893 
7894 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7895 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7896 {
7897   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7898                                         (__v4sf)__B,
7899                                         -(__v4sf)__C,
7900                                         (__mmask8)__U,
7901                                         _MM_FROUND_CUR_DIRECTION);
7902 }
7903 
7904 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7905   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7906                                           (__v4sf)(__m128)(B), \
7907                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
7908                                           (int)(R)))
7909 
7910 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7911 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7912 {
7913   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7914                                         (__v4sf)__X,
7915                                         (__v4sf)__Y,
7916                                         (__mmask8)__U,
7917                                         _MM_FROUND_CUR_DIRECTION);
7918 }
7919 
7920 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7921   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7922                                           (__v4sf)(__m128)(X), \
7923                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7924                                           (int)(R)))
7925 
7926 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7927 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7928 {
7929   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7930                                        -(__v4sf)__A,
7931                                        (__v4sf)__B,
7932                                        (__mmask8)__U,
7933                                        _MM_FROUND_CUR_DIRECTION);
7934 }
7935 
7936 #define _mm_fnmadd_round_ss(A, B, C, R) \
7937   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7938                                          -(__v4sf)(__m128)(B), \
7939                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7940                                          (int)(R)))
7941 
7942 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7943   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7944                                          -(__v4sf)(__m128)(A), \
7945                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7946                                          (int)(R)))
7947 
7948 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7949 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7950 {
7951   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7952                                         -(__v4sf)__B,
7953                                         (__v4sf)__C,
7954                                         (__mmask8)__U,
7955                                         _MM_FROUND_CUR_DIRECTION);
7956 }
7957 
7958 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7959   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7960                                           -(__v4sf)(__m128)(B), \
7961                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7962                                           (int)(R)))
7963 
7964 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7965 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7966 {
7967   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7968                                         -(__v4sf)__X,
7969                                         (__v4sf)__Y,
7970                                         (__mmask8)__U,
7971                                         _MM_FROUND_CUR_DIRECTION);
7972 }
7973 
7974 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7975   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7976                                           -(__v4sf)(__m128)(X), \
7977                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7978                                           (int)(R)))
7979 
7980 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7981 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7982 {
7983   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7984                                        -(__v4sf)__A,
7985                                        -(__v4sf)__B,
7986                                        (__mmask8)__U,
7987                                        _MM_FROUND_CUR_DIRECTION);
7988 }
7989 
7990 #define _mm_fnmsub_round_ss(A, B, C, R) \
7991   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7992                                          -(__v4sf)(__m128)(B), \
7993                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7994                                          (int)(R)))
7995 
7996 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7997   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7998                                          -(__v4sf)(__m128)(A), \
7999                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
8000                                          (int)(R)))
8001 
8002 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8003 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8004 {
8005   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
8006                                         -(__v4sf)__B,
8007                                         -(__v4sf)__C,
8008                                         (__mmask8)__U,
8009                                         _MM_FROUND_CUR_DIRECTION);
8010 }
8011 
8012 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8013   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8014                                           -(__v4sf)(__m128)(B), \
8015                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
8016                                           (int)(R)))
8017 
8018 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8019 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8020 {
8021   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8022                                         -(__v4sf)__X,
8023                                         (__v4sf)__Y,
8024                                         (__mmask8)__U,
8025                                         _MM_FROUND_CUR_DIRECTION);
8026 }
8027 
8028 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8029   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8030                                           -(__v4sf)(__m128)(X), \
8031                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
8032                                           (int)(R)))
8033 
8034 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8035 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8036 {
8037   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8038                                        (__v2df)__A,
8039                                        (__v2df)__B,
8040                                        (__mmask8)__U,
8041                                        _MM_FROUND_CUR_DIRECTION);
8042 }
8043 
8044 #define _mm_fmadd_round_sd(A, B, C, R) \
8045   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8046                                           (__v2df)(__m128d)(B), \
8047                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8048                                           (int)(R)))
8049 
8050 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8051   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8052                                           (__v2df)(__m128d)(A), \
8053                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8054                                           (int)(R)))
8055 
8056 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8057 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8058 {
8059   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8060                                         (__v2df)__B,
8061                                         (__v2df)__C,
8062                                         (__mmask8)__U,
8063                                         _MM_FROUND_CUR_DIRECTION);
8064 }
8065 
8066 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8067   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8068                                            (__v2df)(__m128d)(B), \
8069                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8070                                            (int)(R)))
8071 
8072 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8073 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8074 {
8075   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8076                                         (__v2df)__X,
8077                                         (__v2df)__Y,
8078                                         (__mmask8)__U,
8079                                         _MM_FROUND_CUR_DIRECTION);
8080 }
8081 
8082 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8083   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8084                                            (__v2df)(__m128d)(X), \
8085                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8086                                            (int)(R)))
8087 
8088 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8089 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8090 {
8091   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8092                                        (__v2df)__A,
8093                                        -(__v2df)__B,
8094                                        (__mmask8)__U,
8095                                        _MM_FROUND_CUR_DIRECTION);
8096 }
8097 
8098 #define _mm_fmsub_round_sd(A, B, C, R) \
8099   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8100                                           (__v2df)(__m128d)(B), \
8101                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8102                                           (int)(R)))
8103 
8104 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8105   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8106                                           (__v2df)(__m128d)(A), \
8107                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8108                                           (int)(R)))
8109 
8110 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8111 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8112 {
8113   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8114                                         (__v2df)__B,
8115                                         -(__v2df)__C,
8116                                         (__mmask8)__U,
8117                                         _MM_FROUND_CUR_DIRECTION);
8118 }
8119 
8120 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8121   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8122                                            (__v2df)(__m128d)(B), \
8123                                            -(__v2df)(__m128d)(C), \
8124                                            (__mmask8)(U), (int)(R)))
8125 
8126 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8127 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8128 {
8129   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8130                                         (__v2df)__X,
8131                                         (__v2df)__Y,
8132                                         (__mmask8)__U,
8133                                         _MM_FROUND_CUR_DIRECTION);
8134 }
8135 
8136 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8137   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8138                                            (__v2df)(__m128d)(X), \
8139                                            (__v2df)(__m128d)(Y), \
8140                                            (__mmask8)(U), (int)(R)))
8141 
8142 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8143 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8144 {
8145   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8146                                        -(__v2df)__A,
8147                                        (__v2df)__B,
8148                                        (__mmask8)__U,
8149                                        _MM_FROUND_CUR_DIRECTION);
8150 }
8151 
8152 #define _mm_fnmadd_round_sd(A, B, C, R) \
8153   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8154                                           -(__v2df)(__m128d)(B), \
8155                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8156                                           (int)(R)))
8157 
8158 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8159   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8160                                           -(__v2df)(__m128d)(A), \
8161                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8162                                           (int)(R)))
8163 
8164 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8165 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8166 {
8167   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8168                                         -(__v2df)__B,
8169                                         (__v2df)__C,
8170                                         (__mmask8)__U,
8171                                         _MM_FROUND_CUR_DIRECTION);
8172 }
8173 
8174 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8175   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8176                                            -(__v2df)(__m128d)(B), \
8177                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8178                                            (int)(R)))
8179 
8180 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8181 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8182 {
8183   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8184                                         -(__v2df)__X,
8185                                         (__v2df)__Y,
8186                                         (__mmask8)__U,
8187                                         _MM_FROUND_CUR_DIRECTION);
8188 }
8189 
8190 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8191   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8192                                            -(__v2df)(__m128d)(X), \
8193                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8194                                            (int)(R)))
8195 
8196 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8197 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8198 {
8199   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8200                                        -(__v2df)__A,
8201                                        -(__v2df)__B,
8202                                        (__mmask8)__U,
8203                                        _MM_FROUND_CUR_DIRECTION);
8204 }
8205 
8206 #define _mm_fnmsub_round_sd(A, B, C, R) \
8207   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8208                                           -(__v2df)(__m128d)(B), \
8209                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8210                                           (int)(R)))
8211 
8212 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8213   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8214                                           -(__v2df)(__m128d)(A), \
8215                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8216                                           (int)(R)))
8217 
8218 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8219 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8220 {
8221   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8222                                         -(__v2df)__B,
8223                                         -(__v2df)__C,
8224                                         (__mmask8)__U,
8225                                         _MM_FROUND_CUR_DIRECTION);
8226 }
8227 
8228 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8229   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8230                                            -(__v2df)(__m128d)(B), \
8231                                            -(__v2df)(__m128d)(C), \
8232                                            (__mmask8)(U), \
8233                                            (int)(R)))
8234 
8235 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8236 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8237 {
8238   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8239                                         -(__v2df)__X,
8240                                         (__v2df)__Y,
8241                                         (__mmask8)__U,
8242                                         _MM_FROUND_CUR_DIRECTION);
8243 }
8244 
8245 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8246   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8247                                            -(__v2df)(__m128d)(X), \
8248                                            (__v2df)(__m128d)(Y), \
8249                                            (__mmask8)(U), (int)(R)))
8250 
8251 #define _mm512_permutex_pd(X, C) \
8252   ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8253 
8254 #define _mm512_mask_permutex_pd(W, U, X, C) \
8255   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8256                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8257                                         (__v8df)(__m512d)(W)))
8258 
8259 #define _mm512_maskz_permutex_pd(U, X, C) \
8260   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8261                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8262                                         (__v8df)_mm512_setzero_pd()))
8263 
8264 #define _mm512_permutex_epi64(X, C) \
8265   ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8266 
8267 #define _mm512_mask_permutex_epi64(W, U, X, C) \
8268   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8269                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8270                                        (__v8di)(__m512i)(W)))
8271 
8272 #define _mm512_maskz_permutex_epi64(U, X, C) \
8273   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8274                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8275                                        (__v8di)_mm512_setzero_si512()))
8276 
8277 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8278 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8279 {
8280   return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8281 }
8282 
8283 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8284 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8285 {
8286   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8287                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
8288                                         (__v8df)__W);
8289 }
8290 
8291 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8292 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8293 {
8294   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8295                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
8296                                         (__v8df)_mm512_setzero_pd());
8297 }
8298 
8299 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8300 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8301 {
8302   return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8303 }
8304 
8305 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8306 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8307 {
8308   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8309                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8310                                      (__v8di)_mm512_setzero_si512());
8311 }
8312 
8313 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8314 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8315              __m512i __Y)
8316 {
8317   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8318                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8319                                      (__v8di)__W);
8320 }
8321 
8322 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8323 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8324 {
8325   return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8326 }
8327 
8328 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8329 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8330 {
8331   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8332                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8333                                        (__v16sf)__W);
8334 }
8335 
8336 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8337 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8338 {
8339   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8340                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8341                                        (__v16sf)_mm512_setzero_ps());
8342 }
8343 
8344 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8345 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8346 {
8347   return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8348 }
8349 
8350 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8351 
8352 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8353 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8354 {
8355   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8356                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8357                                     (__v16si)_mm512_setzero_si512());
8358 }
8359 
8360 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8361 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8362              __m512i __Y)
8363 {
8364   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8365                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8366                                     (__v16si)__W);
8367 }
8368 
8369 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8370 
8371 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8372 _mm512_kand (__mmask16 __A, __mmask16 __B)
8373 {
8374   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8375 }
8376 
8377 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8378 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8379 {
8380   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8381 }
8382 
8383 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8384 _mm512_kor (__mmask16 __A, __mmask16 __B)
8385 {
8386   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8387 }
8388 
8389 static __inline__ int __DEFAULT_FN_ATTRS
8390 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8391 {
8392   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8393 }
8394 
8395 static __inline__ int __DEFAULT_FN_ATTRS
8396 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8397 {
8398   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8399 }
8400 
8401 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8402 _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8403 {
8404   return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8405 }
8406 
8407 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8408 _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8409 {
8410   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8411 }
8412 
8413 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8414 _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8415   *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8416   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8417 }
8418 
8419 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8420 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8421 {
8422   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8423 }
8424 
8425 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8426 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8427 {
8428   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8429 }
8430 
8431 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8432 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8433 {
8434   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8435 }
8436 
8437 #define _kand_mask16 _mm512_kand
8438 #define _kandn_mask16 _mm512_kandn
8439 #define _knot_mask16 _mm512_knot
8440 #define _kor_mask16 _mm512_kor
8441 #define _kxnor_mask16 _mm512_kxnor
8442 #define _kxor_mask16 _mm512_kxor
8443 
8444 #define _kshiftli_mask16(A, I) \
8445   ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8446 
8447 #define _kshiftri_mask16(A, I) \
8448   ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8449 
8450 static __inline__ unsigned int __DEFAULT_FN_ATTRS
8451 _cvtmask16_u32(__mmask16 __A) {
8452   return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8453 }
8454 
8455 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8456 _cvtu32_mask16(unsigned int __A) {
8457   return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8458 }
8459 
8460 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8461 _load_mask16(__mmask16 *__A) {
8462   return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8463 }
8464 
8465 static __inline__ void __DEFAULT_FN_ATTRS
8466 _store_mask16(__mmask16 *__A, __mmask16 __B) {
8467   *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8468 }
8469 
8470 static __inline__ void __DEFAULT_FN_ATTRS512
8471 _mm512_stream_si512 (void * __P, __m512i __A)
8472 {
8473   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8474   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8475 }
8476 
8477 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8478 _mm512_stream_load_si512 (void const *__P)
8479 {
8480   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8481   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8482 }
8483 
8484 static __inline__ void __DEFAULT_FN_ATTRS512
8485 _mm512_stream_pd (void *__P, __m512d __A)
8486 {
8487   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8488   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8489 }
8490 
8491 static __inline__ void __DEFAULT_FN_ATTRS512
8492 _mm512_stream_ps (void *__P, __m512 __A)
8493 {
8494   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8495   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8496 }
8497 
8498 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8499 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8500 {
8501   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8502                   (__v8df) __W,
8503                   (__mmask8) __U);
8504 }
8505 
8506 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8507 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8508 {
8509   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8510                   (__v8df)
8511                   _mm512_setzero_pd (),
8512                   (__mmask8) __U);
8513 }
8514 
8515 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8516 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8517 {
8518   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8519                   (__v8di) __W,
8520                   (__mmask8) __U);
8521 }
8522 
8523 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8524 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8525 {
8526   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8527                   (__v8di)
8528                   _mm512_setzero_si512 (),
8529                   (__mmask8) __U);
8530 }
8531 
8532 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8533 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8534 {
8535   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8536                  (__v16sf) __W,
8537                  (__mmask16) __U);
8538 }
8539 
8540 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8541 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8542 {
8543   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8544                  (__v16sf)
8545                  _mm512_setzero_ps (),
8546                  (__mmask16) __U);
8547 }
8548 
8549 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8550 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8551 {
8552   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8553                   (__v16si) __W,
8554                   (__mmask16) __U);
8555 }
8556 
8557 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8558 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8559 {
8560   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8561                   (__v16si)
8562                   _mm512_setzero_si512 (),
8563                   (__mmask16) __U);
8564 }
8565 
8566 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8567   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8568                                        (__v4sf)(__m128)(Y), (int)(P), \
8569                                        (__mmask8)-1, (int)(R)))
8570 
8571 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8572   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8573                                        (__v4sf)(__m128)(Y), (int)(P), \
8574                                        (__mmask8)(M), (int)(R)))
8575 
8576 #define _mm_cmp_ss_mask(X, Y, P) \
8577   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8578                                        (__v4sf)(__m128)(Y), (int)(P), \
8579                                        (__mmask8)-1, \
8580                                        _MM_FROUND_CUR_DIRECTION))
8581 
8582 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8583   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8584                                        (__v4sf)(__m128)(Y), (int)(P), \
8585                                        (__mmask8)(M), \
8586                                        _MM_FROUND_CUR_DIRECTION))
8587 
8588 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8589   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8590                                        (__v2df)(__m128d)(Y), (int)(P), \
8591                                        (__mmask8)-1, (int)(R)))
8592 
8593 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8594   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8595                                        (__v2df)(__m128d)(Y), (int)(P), \
8596                                        (__mmask8)(M), (int)(R)))
8597 
8598 #define _mm_cmp_sd_mask(X, Y, P) \
8599   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8600                                        (__v2df)(__m128d)(Y), (int)(P), \
8601                                        (__mmask8)-1, \
8602                                        _MM_FROUND_CUR_DIRECTION))
8603 
8604 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8605   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8606                                        (__v2df)(__m128d)(Y), (int)(P), \
8607                                        (__mmask8)(M), \
8608                                        _MM_FROUND_CUR_DIRECTION))
8609 
8610 /* Bit Test */
8611 
8612 static __inline __mmask16 __DEFAULT_FN_ATTRS512
8613 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8614 {
8615   return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8616                                    _mm512_setzero_si512());
8617 }
8618 
8619 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8620 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8621 {
8622   return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8623                                         _mm512_setzero_si512());
8624 }
8625 
8626 static __inline __mmask8 __DEFAULT_FN_ATTRS512
8627 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8628 {
8629   return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8630                                    _mm512_setzero_si512());
8631 }
8632 
8633 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8634 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8635 {
8636   return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8637                                         _mm512_setzero_si512());
8638 }
8639 
8640 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8641 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8642 {
8643   return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8644                                   _mm512_setzero_si512());
8645 }
8646 
8647 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8648 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8649 {
8650   return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8651                                        _mm512_setzero_si512());
8652 }
8653 
8654 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8655 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8656 {
8657   return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8658                                   _mm512_setzero_si512());
8659 }
8660 
8661 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8662 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8663 {
8664   return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8665                                        _mm512_setzero_si512());
8666 }
8667 
8668 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8669 _mm512_movehdup_ps (__m512 __A)
8670 {
8671   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8672                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8673 }
8674 
8675 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8676 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8677 {
8678   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8679                                              (__v16sf)_mm512_movehdup_ps(__A),
8680                                              (__v16sf)__W);
8681 }
8682 
8683 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8684 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8685 {
8686   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8687                                              (__v16sf)_mm512_movehdup_ps(__A),
8688                                              (__v16sf)_mm512_setzero_ps());
8689 }
8690 
8691 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8692 _mm512_moveldup_ps (__m512 __A)
8693 {
8694   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8695                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8696 }
8697 
8698 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8699 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8700 {
8701   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8702                                              (__v16sf)_mm512_moveldup_ps(__A),
8703                                              (__v16sf)__W);
8704 }
8705 
8706 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8707 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8708 {
8709   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8710                                              (__v16sf)_mm512_moveldup_ps(__A),
8711                                              (__v16sf)_mm512_setzero_ps());
8712 }
8713 
8714 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8715 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8716 {
8717   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8718 }
8719 
8720 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8721 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8722 {
8723   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8724                                      _mm_setzero_ps());
8725 }
8726 
8727 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8728 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8729 {
8730   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8731 }
8732 
8733 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8734 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8735 {
8736   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8737                                      _mm_setzero_pd());
8738 }
8739 
8740 static __inline__ void __DEFAULT_FN_ATTRS128
8741 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8742 {
8743   __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8744 }
8745 
8746 static __inline__ void __DEFAULT_FN_ATTRS128
8747 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8748 {
8749   __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8750 }
8751 
8752 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8753 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8754 {
8755   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8756                                                 (__v4sf)_mm_setzero_ps(),
8757                                                 0, 4, 4, 4);
8758 
8759   return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8760 }
8761 
8762 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8763 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
8764 {
8765   return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8766                                                 (__v4sf) _mm_setzero_ps(),
8767                                                 __U & 1);
8768 }
8769 
8770 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8771 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8772 {
8773   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8774                                                  (__v2df)_mm_setzero_pd(),
8775                                                  0, 2);
8776 
8777   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8778 }
8779 
8780 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8781 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
8782 {
8783   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8784                                                   (__v2df) _mm_setzero_pd(),
8785                                                   __U & 1);
8786 }
8787 
8788 #define _mm512_shuffle_epi32(A, I) \
8789   ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8790 
8791 #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8792   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8793                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8794                                        (__v16si)(__m512i)(W)))
8795 
8796 #define _mm512_maskz_shuffle_epi32(U, A, I) \
8797   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8798                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8799                                        (__v16si)_mm512_setzero_si512()))
8800 
8801 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8802 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8803 {
8804   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8805                 (__v8df) __W,
8806                 (__mmask8) __U);
8807 }
8808 
8809 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8810 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8811 {
8812   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8813                 (__v8df) _mm512_setzero_pd (),
8814                 (__mmask8) __U);
8815 }
8816 
8817 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8818 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8819 {
8820   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8821                 (__v8di) __W,
8822                 (__mmask8) __U);
8823 }
8824 
8825 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8826 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8827 {
8828   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8829                 (__v8di) _mm512_setzero_si512 (),
8830                 (__mmask8) __U);
8831 }
8832 
8833 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8834 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8835 {
8836   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8837               (__v8df) __W,
8838               (__mmask8) __U);
8839 }
8840 
8841 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8842 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8843 {
8844   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8845               (__v8df) _mm512_setzero_pd(),
8846               (__mmask8) __U);
8847 }
8848 
8849 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8850 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8851 {
8852   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8853               (__v8di) __W,
8854               (__mmask8) __U);
8855 }
8856 
8857 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8858 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8859 {
8860   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8861               (__v8di) _mm512_setzero_si512(),
8862               (__mmask8) __U);
8863 }
8864 
8865 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8866 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8867 {
8868   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8869                    (__v16sf) __W,
8870                    (__mmask16) __U);
8871 }
8872 
8873 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8874 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8875 {
8876   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8877                    (__v16sf) _mm512_setzero_ps(),
8878                    (__mmask16) __U);
8879 }
8880 
8881 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8882 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8883 {
8884   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8885               (__v16si) __W,
8886               (__mmask16) __U);
8887 }
8888 
8889 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8890 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8891 {
8892   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8893               (__v16si) _mm512_setzero_si512(),
8894               (__mmask16) __U);
8895 }
8896 
8897 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8898 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8899 {
8900   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8901                (__v16sf) __W,
8902                (__mmask16) __U);
8903 }
8904 
8905 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8906 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8907 {
8908   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8909                (__v16sf) _mm512_setzero_ps(),
8910                (__mmask16) __U);
8911 }
8912 
8913 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8914 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8915 {
8916   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8917                 (__v16si) __W,
8918                 (__mmask16) __U);
8919 }
8920 
8921 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8922 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8923 {
8924   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8925                 (__v16si) _mm512_setzero_si512(),
8926                 (__mmask16) __U);
8927 }
8928 
8929 #define _mm512_cvt_roundps_pd(A, R) \
8930   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8931                                             (__v8df)_mm512_undefined_pd(), \
8932                                             (__mmask8)-1, (int)(R)))
8933 
8934 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8935   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8936                                             (__v8df)(__m512d)(W), \
8937                                             (__mmask8)(U), (int)(R)))
8938 
8939 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8940   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8941                                             (__v8df)_mm512_setzero_pd(), \
8942                                             (__mmask8)(U), (int)(R)))
8943 
8944 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8945 _mm512_cvtps_pd (__m256 __A)
8946 {
8947   return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8948 }
8949 
8950 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8951 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8952 {
8953   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8954                                               (__v8df)_mm512_cvtps_pd(__A),
8955                                               (__v8df)__W);
8956 }
8957 
8958 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8959 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8960 {
8961   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8962                                               (__v8df)_mm512_cvtps_pd(__A),
8963                                               (__v8df)_mm512_setzero_pd());
8964 }
8965 
8966 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8967 _mm512_cvtpslo_pd (__m512 __A)
8968 {
8969   return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8970 }
8971 
8972 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8973 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8974 {
8975   return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8976 }
8977 
8978 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8979 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8980 {
8981   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8982               (__v8df) __A,
8983               (__v8df) __W);
8984 }
8985 
8986 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8987 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8988 {
8989   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8990               (__v8df) __A,
8991               (__v8df) _mm512_setzero_pd ());
8992 }
8993 
8994 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8995 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8996 {
8997   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8998              (__v16sf) __A,
8999              (__v16sf) __W);
9000 }
9001 
9002 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9003 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9004 {
9005   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9006              (__v16sf) __A,
9007              (__v16sf) _mm512_setzero_ps ());
9008 }
9009 
9010 static __inline__ void __DEFAULT_FN_ATTRS512
9011 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9012 {
9013   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9014             (__mmask8) __U);
9015 }
9016 
9017 static __inline__ void __DEFAULT_FN_ATTRS512
9018 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9019 {
9020   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9021             (__mmask8) __U);
9022 }
9023 
9024 static __inline__ void __DEFAULT_FN_ATTRS512
9025 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9026 {
9027   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9028             (__mmask16) __U);
9029 }
9030 
9031 static __inline__ void __DEFAULT_FN_ATTRS512
9032 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9033 {
9034   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9035             (__mmask16) __U);
9036 }
9037 
9038 #define _mm_cvt_roundsd_ss(A, B, R) \
9039   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9040                                               (__v2df)(__m128d)(B), \
9041                                               (__v4sf)_mm_undefined_ps(), \
9042                                               (__mmask8)-1, (int)(R)))
9043 
9044 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9045   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9046                                               (__v2df)(__m128d)(B), \
9047                                               (__v4sf)(__m128)(W), \
9048                                               (__mmask8)(U), (int)(R)))
9049 
9050 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9051   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9052                                               (__v2df)(__m128d)(B), \
9053                                               (__v4sf)_mm_setzero_ps(), \
9054                                               (__mmask8)(U), (int)(R)))
9055 
9056 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9057 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9058 {
9059   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9060                                              (__v2df)__B,
9061                                              (__v4sf)__W,
9062                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9063 }
9064 
9065 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9066 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9067 {
9068   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9069                                              (__v2df)__B,
9070                                              (__v4sf)_mm_setzero_ps(),
9071                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9072 }
9073 
9074 #define _mm_cvtss_i32 _mm_cvtss_si32
9075 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9076 #define _mm_cvti32_sd _mm_cvtsi32_sd
9077 #define _mm_cvti32_ss _mm_cvtsi32_ss
9078 #ifdef __x86_64__
9079 #define _mm_cvtss_i64 _mm_cvtss_si64
9080 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9081 #define _mm_cvti64_sd _mm_cvtsi64_sd
9082 #define _mm_cvti64_ss _mm_cvtsi64_ss
9083 #endif
9084 
9085 #ifdef __x86_64__
9086 #define _mm_cvt_roundi64_sd(A, B, R) \
9087   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9088                                       (int)(R)))
9089 
9090 #define _mm_cvt_roundsi64_sd(A, B, R) \
9091   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9092                                       (int)(R)))
9093 #endif
9094 
9095 #define _mm_cvt_roundsi32_ss(A, B, R) \
9096   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9097 
9098 #define _mm_cvt_roundi32_ss(A, B, R) \
9099   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9100 
9101 #ifdef __x86_64__
9102 #define _mm_cvt_roundsi64_ss(A, B, R) \
9103   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9104                                      (int)(R)))
9105 
9106 #define _mm_cvt_roundi64_ss(A, B, R) \
9107   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9108                                      (int)(R)))
9109 #endif
9110 
9111 #define _mm_cvt_roundss_sd(A, B, R) \
9112   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9113                                                (__v4sf)(__m128)(B), \
9114                                                (__v2df)_mm_undefined_pd(), \
9115                                                (__mmask8)-1, (int)(R)))
9116 
9117 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9118   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9119                                                (__v4sf)(__m128)(B), \
9120                                                (__v2df)(__m128d)(W), \
9121                                                (__mmask8)(U), (int)(R)))
9122 
9123 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9124   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9125                                                (__v4sf)(__m128)(B), \
9126                                                (__v2df)_mm_setzero_pd(), \
9127                                                (__mmask8)(U), (int)(R)))
9128 
9129 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9130 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9131 {
9132   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9133                                             (__v4sf)__B,
9134                                             (__v2df)__W,
9135                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9136 }
9137 
9138 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9139 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9140 {
9141   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9142                                             (__v4sf)__B,
9143                                             (__v2df)_mm_setzero_pd(),
9144                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9145 }
9146 
9147 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9148 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9149 {
9150   __A[0] = __B;
9151   return __A;
9152 }
9153 
9154 #ifdef __x86_64__
9155 #define _mm_cvt_roundu64_sd(A, B, R) \
9156   ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9157                                        (unsigned long long)(B), (int)(R)))
9158 
9159 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9160 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9161 {
9162   __A[0] = __B;
9163   return __A;
9164 }
9165 #endif
9166 
9167 #define _mm_cvt_roundu32_ss(A, B, R) \
9168   ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9169                                       (int)(R)))
9170 
9171 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9172 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9173 {
9174   __A[0] = __B;
9175   return __A;
9176 }
9177 
9178 #ifdef __x86_64__
9179 #define _mm_cvt_roundu64_ss(A, B, R) \
9180   ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9181                                       (unsigned long long)(B), (int)(R)))
9182 
9183 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9184 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9185 {
9186   __A[0] = __B;
9187   return __A;
9188 }
9189 #endif
9190 
9191 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9192 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9193 {
9194   return (__m512i) __builtin_ia32_selectd_512(__M,
9195                                               (__v16si) _mm512_set1_epi32(__A),
9196                                               (__v16si) __O);
9197 }
9198 
9199 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9200 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9201 {
9202   return (__m512i) __builtin_ia32_selectq_512(__M,
9203                                               (__v8di) _mm512_set1_epi64(__A),
9204                                               (__v8di) __O);
9205 }
9206 
9207 static  __inline __m512i __DEFAULT_FN_ATTRS512
9208 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9209     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9210     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9211     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9212     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9213     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9214     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9215     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9216     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9217     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9218     char __e4, char __e3, char __e2, char __e1, char __e0) {
9219 
9220   return __extension__ (__m512i)(__v64qi)
9221     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9222      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9223      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9224      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9225      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9226      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9227      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9228      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9229 }
9230 
9231 static  __inline __m512i __DEFAULT_FN_ATTRS512
9232 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9233     short __e27, short __e26, short __e25, short __e24, short __e23,
9234     short __e22, short __e21, short __e20, short __e19, short __e18,
9235     short __e17, short __e16, short __e15, short __e14, short __e13,
9236     short __e12, short __e11, short __e10, short __e9, short __e8,
9237     short __e7, short __e6, short __e5, short __e4, short __e3,
9238     short __e2, short __e1, short __e0) {
9239   return __extension__ (__m512i)(__v32hi)
9240     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9241      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9242      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9243      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9244 }
9245 
9246 static __inline __m512i __DEFAULT_FN_ATTRS512
9247 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9248      int __E, int __F, int __G, int __H,
9249      int __I, int __J, int __K, int __L,
9250      int __M, int __N, int __O, int __P)
9251 {
9252   return __extension__ (__m512i)(__v16si)
9253   { __P, __O, __N, __M, __L, __K, __J, __I,
9254     __H, __G, __F, __E, __D, __C, __B, __A };
9255 }
9256 
9257 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9258        e8,e9,e10,e11,e12,e13,e14,e15)          \
9259   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9260                    (e5),(e4),(e3),(e2),(e1),(e0))
9261 
9262 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9263 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9264      long long __D, long long __E, long long __F,
9265      long long __G, long long __H)
9266 {
9267   return __extension__ (__m512i) (__v8di)
9268   { __H, __G, __F, __E, __D, __C, __B, __A };
9269 }
9270 
9271 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9272   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9273 
9274 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9275 _mm512_set_pd (double __A, double __B, double __C, double __D,
9276         double __E, double __F, double __G, double __H)
9277 {
9278   return __extension__ (__m512d)
9279   { __H, __G, __F, __E, __D, __C, __B, __A };
9280 }
9281 
9282 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9283   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9284 
9285 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9286 _mm512_set_ps (float __A, float __B, float __C, float __D,
9287         float __E, float __F, float __G, float __H,
9288         float __I, float __J, float __K, float __L,
9289         float __M, float __N, float __O, float __P)
9290 {
9291   return __extension__ (__m512)
9292   { __P, __O, __N, __M, __L, __K, __J, __I,
9293     __H, __G, __F, __E, __D, __C, __B, __A };
9294 }
9295 
9296 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9297   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9298                 (e4),(e3),(e2),(e1),(e0))
9299 
9300 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9301 _mm512_abs_ps(__m512 __A)
9302 {
9303   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9304 }
9305 
9306 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9307 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9308 {
9309   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9310 }
9311 
9312 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9313 _mm512_abs_pd(__m512d __A)
9314 {
9315   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9316 }
9317 
9318 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9319 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9320 {
9321   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9322 }
9323 
9324 /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9325  * outputs. This class of vector operation forms the basis of many scientific
9326  * computations. In vector-reduction arithmetic, the evaluation order is
9327  * independent of the order of the input elements of V.
9328 
9329  * For floating-point intrinsics:
9330  * 1. When using fadd/fmul intrinsics, the order of operations within the
9331  * vector is unspecified (associative math).
9332  * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9333  * produce unspecified results.
9334 
9335  * Used bisection method. At each step, we partition the vector with previous
9336  * step in half, and the operation is performed on its two halves.
9337  * This takes log2(n) steps where n is the number of elements in the vector.
9338  */
9339 
9340 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9341   return __builtin_reduce_add((__v8di)__W);
9342 }
9343 
9344 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9345   return __builtin_reduce_mul((__v8di)__W);
9346 }
9347 
9348 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9349   return __builtin_reduce_and((__v8di)__W);
9350 }
9351 
9352 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9353   return __builtin_reduce_or((__v8di)__W);
9354 }
9355 
9356 static __inline__ long long __DEFAULT_FN_ATTRS512
9357 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9358   __W = _mm512_maskz_mov_epi64(__M, __W);
9359   return __builtin_reduce_add((__v8di)__W);
9360 }
9361 
9362 static __inline__ long long __DEFAULT_FN_ATTRS512
9363 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9364   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9365   return __builtin_reduce_mul((__v8di)__W);
9366 }
9367 
9368 static __inline__ long long __DEFAULT_FN_ATTRS512
9369 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9370   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9371   return __builtin_reduce_and((__v8di)__W);
9372 }
9373 
9374 static __inline__ long long __DEFAULT_FN_ATTRS512
9375 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9376   __W = _mm512_maskz_mov_epi64(__M, __W);
9377   return __builtin_reduce_or((__v8di)__W);
9378 }
9379 
9380 // -0.0 is used to ignore the start value since it is the neutral value of
9381 // floating point addition. For more information, please refer to
9382 // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9383 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9384   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9385 }
9386 
9387 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9388   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9389 }
9390 
9391 static __inline__ double __DEFAULT_FN_ATTRS512
9392 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9393   __W = _mm512_maskz_mov_pd(__M, __W);
9394   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9395 }
9396 
9397 static __inline__ double __DEFAULT_FN_ATTRS512
9398 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9399   __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9400   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9401 }
9402 
9403 static __inline__ int __DEFAULT_FN_ATTRS512
9404 _mm512_reduce_add_epi32(__m512i __W) {
9405   return __builtin_reduce_add((__v16si)__W);
9406 }
9407 
9408 static __inline__ int __DEFAULT_FN_ATTRS512
9409 _mm512_reduce_mul_epi32(__m512i __W) {
9410   return __builtin_reduce_mul((__v16si)__W);
9411 }
9412 
9413 static __inline__ int __DEFAULT_FN_ATTRS512
9414 _mm512_reduce_and_epi32(__m512i __W) {
9415   return __builtin_reduce_and((__v16si)__W);
9416 }
9417 
9418 static __inline__ int __DEFAULT_FN_ATTRS512
9419 _mm512_reduce_or_epi32(__m512i __W) {
9420   return __builtin_reduce_or((__v16si)__W);
9421 }
9422 
9423 static __inline__ int __DEFAULT_FN_ATTRS512
9424 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9425   __W = _mm512_maskz_mov_epi32(__M, __W);
9426   return __builtin_reduce_add((__v16si)__W);
9427 }
9428 
9429 static __inline__ int __DEFAULT_FN_ATTRS512
9430 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9431   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9432   return __builtin_reduce_mul((__v16si)__W);
9433 }
9434 
9435 static __inline__ int __DEFAULT_FN_ATTRS512
9436 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9437   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9438   return __builtin_reduce_and((__v16si)__W);
9439 }
9440 
9441 static __inline__ int __DEFAULT_FN_ATTRS512
9442 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9443   __W = _mm512_maskz_mov_epi32(__M, __W);
9444   return __builtin_reduce_or((__v16si)__W);
9445 }
9446 
9447 static __inline__ float __DEFAULT_FN_ATTRS512
9448 _mm512_reduce_add_ps(__m512 __W) {
9449   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9450 }
9451 
9452 static __inline__ float __DEFAULT_FN_ATTRS512
9453 _mm512_reduce_mul_ps(__m512 __W) {
9454   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9455 }
9456 
9457 static __inline__ float __DEFAULT_FN_ATTRS512
9458 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9459   __W = _mm512_maskz_mov_ps(__M, __W);
9460   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9461 }
9462 
9463 static __inline__ float __DEFAULT_FN_ATTRS512
9464 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9465   __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9466   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9467 }
9468 
9469 static __inline__ long long __DEFAULT_FN_ATTRS512
9470 _mm512_reduce_max_epi64(__m512i __V) {
9471   return __builtin_reduce_max((__v8di)__V);
9472 }
9473 
9474 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9475 _mm512_reduce_max_epu64(__m512i __V) {
9476   return __builtin_reduce_max((__v8du)__V);
9477 }
9478 
9479 static __inline__ long long __DEFAULT_FN_ATTRS512
9480 _mm512_reduce_min_epi64(__m512i __V) {
9481   return __builtin_reduce_min((__v8di)__V);
9482 }
9483 
9484 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9485 _mm512_reduce_min_epu64(__m512i __V) {
9486   return __builtin_reduce_min((__v8du)__V);
9487 }
9488 
9489 static __inline__ long long __DEFAULT_FN_ATTRS512
9490 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9491   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9492   return __builtin_reduce_max((__v8di)__V);
9493 }
9494 
9495 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9496 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9497   __V = _mm512_maskz_mov_epi64(__M, __V);
9498   return __builtin_reduce_max((__v8du)__V);
9499 }
9500 
9501 static __inline__ long long __DEFAULT_FN_ATTRS512
9502 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9503   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9504   return __builtin_reduce_min((__v8di)__V);
9505 }
9506 
9507 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9508 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9509   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9510   return __builtin_reduce_min((__v8du)__V);
9511 }
9512 static __inline__ int __DEFAULT_FN_ATTRS512
9513 _mm512_reduce_max_epi32(__m512i __V) {
9514   return __builtin_reduce_max((__v16si)__V);
9515 }
9516 
9517 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9518 _mm512_reduce_max_epu32(__m512i __V) {
9519   return __builtin_reduce_max((__v16su)__V);
9520 }
9521 
9522 static __inline__ int __DEFAULT_FN_ATTRS512
9523 _mm512_reduce_min_epi32(__m512i __V) {
9524   return __builtin_reduce_min((__v16si)__V);
9525 }
9526 
9527 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9528 _mm512_reduce_min_epu32(__m512i __V) {
9529   return __builtin_reduce_min((__v16su)__V);
9530 }
9531 
9532 static __inline__ int __DEFAULT_FN_ATTRS512
9533 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9534   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9535   return __builtin_reduce_max((__v16si)__V);
9536 }
9537 
9538 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9539 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9540   __V = _mm512_maskz_mov_epi32(__M, __V);
9541   return __builtin_reduce_max((__v16su)__V);
9542 }
9543 
9544 static __inline__ int __DEFAULT_FN_ATTRS512
9545 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9546   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9547   return __builtin_reduce_min((__v16si)__V);
9548 }
9549 
9550 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9551 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9552   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9553   return __builtin_reduce_min((__v16su)__V);
9554 }
9555 
9556 static __inline__ double __DEFAULT_FN_ATTRS512
9557 _mm512_reduce_max_pd(__m512d __V) {
9558   return __builtin_ia32_reduce_fmax_pd512(__V);
9559 }
9560 
9561 static __inline__ double __DEFAULT_FN_ATTRS512
9562 _mm512_reduce_min_pd(__m512d __V) {
9563   return __builtin_ia32_reduce_fmin_pd512(__V);
9564 }
9565 
9566 static __inline__ double __DEFAULT_FN_ATTRS512
9567 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9568   __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9569   return __builtin_ia32_reduce_fmax_pd512(__V);
9570 }
9571 
9572 static __inline__ double __DEFAULT_FN_ATTRS512
9573 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9574   __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9575   return __builtin_ia32_reduce_fmin_pd512(__V);
9576 }
9577 
9578 static __inline__ float __DEFAULT_FN_ATTRS512
9579 _mm512_reduce_max_ps(__m512 __V) {
9580   return __builtin_ia32_reduce_fmax_ps512(__V);
9581 }
9582 
9583 static __inline__ float __DEFAULT_FN_ATTRS512
9584 _mm512_reduce_min_ps(__m512 __V) {
9585   return __builtin_ia32_reduce_fmin_ps512(__V);
9586 }
9587 
9588 static __inline__ float __DEFAULT_FN_ATTRS512
9589 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9590   __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9591   return __builtin_ia32_reduce_fmax_ps512(__V);
9592 }
9593 
9594 static __inline__ float __DEFAULT_FN_ATTRS512
9595 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9596   __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9597   return __builtin_ia32_reduce_fmin_ps512(__V);
9598 }
9599 
9600 /// Moves the least significant 32 bits of a vector of [16 x i32] to a
9601 ///    32-bit signed integer value.
9602 ///
9603 /// \headerfile <x86intrin.h>
9604 ///
9605 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9606 ///
9607 /// \param __A
9608 ///    A vector of [16 x i32]. The least significant 32 bits are moved to the
9609 ///    destination.
9610 /// \returns A 32-bit signed integer containing the moved value.
9611 static __inline__ int __DEFAULT_FN_ATTRS512
9612 _mm512_cvtsi512_si32(__m512i __A) {
9613   __v16si __b = (__v16si)__A;
9614   return __b[0];
9615 }
9616 
9617 /// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9618 /// locations starting at location \a base_addr at packed 32-bit integer indices
9619 /// stored in the lower half of \a vindex scaled by \a scale them in dst.
9620 ///
9621 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9622 ///
9623 /// \code{.operation}
9624 /// FOR j := 0 to 7
9625 ///   i := j*64
9626 ///   m := j*32
9627 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9628 ///   dst[i+63:i] := MEM[addr+63:addr]
9629 /// ENDFOR
9630 /// dst[MAX:512] := 0
9631 /// \endcode
9632 #define _mm512_i32logather_pd(vindex, base_addr, scale)                        \
9633   _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9634 
9635 /// Loads 8 double-precision (64-bit) floating-point elements from memory
9636 /// starting at location \a base_addr at packed 32-bit integer indices stored in
9637 /// the lower half of \a vindex scaled by \a scale into dst using writemask
9638 /// \a mask (elements are copied from \a src when the corresponding mask bit is
9639 /// not set).
9640 ///
9641 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9642 ///
9643 /// \code{.operation}
9644 /// FOR j := 0 to 7
9645 ///   i := j*64
9646 ///   m := j*32
9647 ///   IF mask[j]
9648 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9649 ///     dst[i+63:i] := MEM[addr+63:addr]
9650 ///   ELSE
9651 ///     dst[i+63:i] := src[i+63:i]
9652 ///   FI
9653 /// ENDFOR
9654 /// dst[MAX:512] := 0
9655 /// \endcode
9656 #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)        \
9657   _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),      \
9658                            (base_addr), (scale))
9659 
9660 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9661 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9662 /// scaled by \a scale and stores them in dst.
9663 ///
9664 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9665 ///
9666 /// \code{.operation}
9667 /// FOR j := 0 to 7
9668 ///   i := j*64
9669 ///   m := j*32
9670 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9671 ///   dst[i+63:i] := MEM[addr+63:addr]
9672 /// ENDFOR
9673 /// dst[MAX:512] := 0
9674 /// \endcode
9675 #define _mm512_i32logather_epi64(vindex, base_addr, scale)                     \
9676   _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9677 
9678 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9679 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9680 /// scaled by \a scale and stores them in dst using writemask \a mask (elements
9681 /// are copied from \a src when the corresponding mask bit is not set).
9682 ///
9683 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9684 ///
9685 /// \code{.operation}
9686 /// FOR j := 0 to 7
9687 ///   i := j*64
9688 ///   m := j*32
9689 ///   IF mask[j]
9690 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9691 ///     dst[i+63:i] := MEM[addr+63:addr]
9692 ///   ELSE
9693 ///     dst[i+63:i] := src[i+63:i]
9694 ///   FI
9695 /// ENDFOR
9696 /// dst[MAX:512] := 0
9697 /// \endcode
9698 #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale)     \
9699   _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex),   \
9700                               (base_addr), (scale))
9701 
9702 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9703 /// and to memory locations starting at location \a base_addr at packed 32-bit
9704 /// integer indices stored in \a vindex scaled by \a scale.
9705 ///
9706 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9707 ///
9708 /// \code{.operation}
9709 /// FOR j := 0 to 7
9710 ///   i := j*64
9711 ///   m := j*32
9712 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9713 ///   MEM[addr+63:addr] := v1[i+63:i]
9714 /// ENDFOR
9715 /// \endcode
9716 #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale)                   \
9717   _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9718 
9719 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9720 /// to memory locations starting at location \a base_addr at packed 32-bit
9721 /// integer indices stored in \a vindex scaled by \a scale. Only those elements
9722 /// whose corresponding mask bit is set in writemask \a mask are written to
9723 /// memory.
9724 ///
9725 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9726 ///
9727 /// \code{.operation}
9728 /// FOR j := 0 to 7
9729 ///   i := j*64
9730 ///   m := j*32
9731 ///   IF mask[j]
9732 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9733 ///     MEM[addr+63:addr] := a[i+63:i]
9734 ///   FI
9735 /// ENDFOR
9736 /// \endcode
9737 #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale)        \
9738   _mm512_mask_i32scatter_pd((base_addr), (mask),                               \
9739                             _mm512_castsi512_si256(vindex), (v1), (scale))
9740 
9741 /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9742 /// memory locations starting at location \a base_addr at packed 32-bit integer
9743 /// indices stored in \a vindex scaled by \a scale.
9744 ///
9745 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9746 ///
9747 /// \code{.operation}
9748 /// FOR j := 0 to 7
9749 ///   i := j*64
9750 ///   m := j*32
9751 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9752 ///   MEM[addr+63:addr] := a[i+63:i]
9753 /// ENDFOR
9754 /// \endcode
9755 #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale)                \
9756   _mm512_i32scatter_epi64((base_addr),                                         \
9757                           _mm512_castsi512_si256(vindex), (v1), (scale))
9758 
9759 /// Stores 8 packed 64-bit integer elements located in a and stores them in
9760 /// memory locations starting at location \a base_addr at packed 32-bit integer
9761 /// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9762 /// whose corresponding mask bit is not set are not written to memory).
9763 ///
9764 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9765 ///
9766 /// \code{.operation}
9767 /// FOR j := 0 to 7
9768 ///   i := j*64
9769 ///   m := j*32
9770 ///   IF mask[j]
9771 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9772 ///     MEM[addr+63:addr] := a[i+63:i]
9773 ///   FI
9774 /// ENDFOR
9775 /// \endcode
9776 #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale)     \
9777   _mm512_mask_i32scatter_epi64((base_addr), (mask),                            \
9778                                _mm512_castsi512_si256(vindex), (v1), (scale))
9779 
9780 #undef __DEFAULT_FN_ATTRS512
9781 #undef __DEFAULT_FN_ATTRS128
9782 #undef __DEFAULT_FN_ATTRS
9783 #undef __DEFAULT_FN_ATTRS512_CONSTEXPR
9784 #undef __DEFAULT_FN_ATTRS128_CONSTEXPR
9785 #undef __DEFAULT_FN_ATTRS_CONSTEXPR
9786 
9787 #endif /* __AVX512FINTRIN_H */
9788