xref: /netbsd-src/external/gpl3/gcc/dist/gcc/config/i386/avx512fp16intrin.h (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef __AVX512FP16INTRIN_H_INCLUDED
29 #define __AVX512FP16INTRIN_H_INCLUDED
30 
31 #ifndef __AVX512FP16__
32 #pragma GCC push_options
33 #pragma GCC target("avx512fp16")
34 #define __DISABLE_AVX512FP16__
35 #endif /* __AVX512FP16__ */
36 
37 /* Internal data types for implementing the intrinsics.  */
38 typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
39 typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
40 typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
41 
42 /* The Intel API is flexible enough that we must allow aliasing with other
43    vector types, and their scalar components.  */
44 typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
45 typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
46 typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
47 
48 /* Unaligned version of the same type.  */
49 typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16),	\
50 					   __may_alias__, __aligned__ (1)));
51 typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32),	\
52 					   __may_alias__, __aligned__ (1)));
53 typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64),	\
54 					   __may_alias__, __aligned__ (1)));
55 
56 extern __inline __m128h
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ph(_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)58 _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
59 	    _Float16 __A4, _Float16 __A3, _Float16 __A2,
60 	    _Float16 __A1, _Float16 __A0)
61 {
62   return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
63 					  __A4, __A5, __A6, __A7 };
64 }
65 
66 extern __inline __m256h
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_ph(_Float16 __A15,_Float16 __A14,_Float16 __A13,_Float16 __A12,_Float16 __A11,_Float16 __A10,_Float16 __A9,_Float16 __A8,_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)68 _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
69 	       _Float16 __A12, _Float16 __A11, _Float16 __A10,
70 	       _Float16 __A9, _Float16 __A8, _Float16 __A7,
71 	       _Float16 __A6, _Float16 __A5, _Float16 __A4,
72 	       _Float16 __A3, _Float16 __A2, _Float16 __A1,
73 	       _Float16 __A0)
74 {
75   return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
76 					   __A4, __A5, __A6, __A7,
77 					   __A8, __A9, __A10, __A11,
78 					   __A12, __A13, __A14, __A15 };
79 }
80 
81 extern __inline __m512h
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_ph(_Float16 __A31,_Float16 __A30,_Float16 __A29,_Float16 __A28,_Float16 __A27,_Float16 __A26,_Float16 __A25,_Float16 __A24,_Float16 __A23,_Float16 __A22,_Float16 __A21,_Float16 __A20,_Float16 __A19,_Float16 __A18,_Float16 __A17,_Float16 __A16,_Float16 __A15,_Float16 __A14,_Float16 __A13,_Float16 __A12,_Float16 __A11,_Float16 __A10,_Float16 __A9,_Float16 __A8,_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)83 _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
84 	       _Float16 __A28, _Float16 __A27, _Float16 __A26,
85 	       _Float16 __A25, _Float16 __A24, _Float16 __A23,
86 	       _Float16 __A22, _Float16 __A21, _Float16 __A20,
87 	       _Float16 __A19, _Float16 __A18, _Float16 __A17,
88 	       _Float16 __A16, _Float16 __A15, _Float16 __A14,
89 	       _Float16 __A13, _Float16 __A12, _Float16 __A11,
90 	       _Float16 __A10, _Float16 __A9, _Float16 __A8,
91 	       _Float16 __A7, _Float16 __A6, _Float16 __A5,
92 	       _Float16 __A4, _Float16 __A3, _Float16 __A2,
93 	       _Float16 __A1, _Float16 __A0)
94 {
95   return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
96 					   __A4, __A5, __A6, __A7,
97 					   __A8, __A9, __A10, __A11,
98 					   __A12, __A13, __A14, __A15,
99 					   __A16, __A17, __A18, __A19,
100 					   __A20, __A21, __A22, __A23,
101 					   __A24, __A25, __A26, __A27,
102 					   __A28, __A29, __A30, __A31 };
103 }
104 
105 /* Create vectors of elements in the reversed order from _mm_set_ph,
106    _mm256_set_ph and _mm512_set_ph functions.  */
107 
108 extern __inline __m128h
109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7)110 _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
111 	     _Float16 __A3, _Float16 __A4, _Float16 __A5,
112 	     _Float16 __A6, _Float16 __A7)
113 {
114   return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
115 }
116 
117 extern __inline __m256h
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7,_Float16 __A8,_Float16 __A9,_Float16 __A10,_Float16 __A11,_Float16 __A12,_Float16 __A13,_Float16 __A14,_Float16 __A15)119 _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
120 		_Float16 __A3, _Float16 __A4, _Float16 __A5,
121 		_Float16 __A6, _Float16 __A7, _Float16 __A8,
122 		_Float16 __A9, _Float16 __A10, _Float16 __A11,
123 		_Float16 __A12, _Float16 __A13, _Float16 __A14,
124 		_Float16 __A15)
125 {
126   return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
127 			__A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
128 			__A0);
129 }
130 
131 extern __inline __m512h
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7,_Float16 __A8,_Float16 __A9,_Float16 __A10,_Float16 __A11,_Float16 __A12,_Float16 __A13,_Float16 __A14,_Float16 __A15,_Float16 __A16,_Float16 __A17,_Float16 __A18,_Float16 __A19,_Float16 __A20,_Float16 __A21,_Float16 __A22,_Float16 __A23,_Float16 __A24,_Float16 __A25,_Float16 __A26,_Float16 __A27,_Float16 __A28,_Float16 __A29,_Float16 __A30,_Float16 __A31)133 _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
134 		_Float16 __A3, _Float16 __A4, _Float16 __A5,
135 		_Float16 __A6, _Float16 __A7, _Float16 __A8,
136 		_Float16 __A9, _Float16 __A10, _Float16 __A11,
137 		_Float16 __A12, _Float16 __A13, _Float16 __A14,
138 		_Float16 __A15, _Float16 __A16, _Float16 __A17,
139 		_Float16 __A18, _Float16 __A19, _Float16 __A20,
140 		_Float16 __A21, _Float16 __A22, _Float16 __A23,
141 		_Float16 __A24, _Float16 __A25, _Float16 __A26,
142 		_Float16 __A27, _Float16 __A28, _Float16 __A29,
143 		_Float16 __A30, _Float16 __A31)
144 
145 {
146   return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
147 			__A24, __A23, __A22, __A21, __A20, __A19, __A18,
148 			__A17, __A16, __A15, __A14, __A13, __A12, __A11,
149 			__A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
150 			__A2, __A1, __A0);
151 }
152 
153 /* Broadcast _Float16 to vector.  */
154 
155 extern __inline __m128h
156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_ph(_Float16 __A)157 _mm_set1_ph (_Float16 __A)
158 {
159   return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
160 }
161 
162 extern __inline __m256h
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_ph(_Float16 __A)164 _mm256_set1_ph (_Float16 __A)
165 {
166   return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
167 			__A, __A, __A, __A, __A, __A, __A, __A);
168 }
169 
170 extern __inline __m512h
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_ph(_Float16 __A)172 _mm512_set1_ph (_Float16 __A)
173 {
174   return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
175 			__A, __A, __A, __A, __A, __A, __A, __A,
176 			__A, __A, __A, __A, __A, __A, __A, __A,
177 			__A, __A, __A, __A, __A, __A, __A, __A);
178 }
179 
180 /* Create a vector with all zeros.  */
181 
182 extern __inline __m128h
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ph(void)184 _mm_setzero_ph (void)
185 {
186   return _mm_set1_ph (0.0f);
187 }
188 
189 extern __inline __m256h
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_ph(void)191 _mm256_setzero_ph (void)
192 {
193   return _mm256_set1_ph (0.0f);
194 }
195 
196 extern __inline __m512h
197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ph(void)198 _mm512_setzero_ph (void)
199 {
200   return _mm512_set1_ph (0.0f);
201 }
202 
203 extern __inline __m128h
204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_ph(void)205 _mm_undefined_ph (void)
206 {
207 #pragma GCC diagnostic push
208 #pragma GCC diagnostic ignored "-Winit-self"
209   __m128h __Y = __Y;
210 #pragma GCC diagnostic pop
211   return __Y;
212 }
213 
214 extern __inline __m256h
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_undefined_ph(void)216 _mm256_undefined_ph (void)
217 {
218 #pragma GCC diagnostic push
219 #pragma GCC diagnostic ignored "-Winit-self"
220   __m256h __Y = __Y;
221 #pragma GCC diagnostic pop
222   return __Y;
223 }
224 
225 extern __inline __m512h
226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_ph(void)227 _mm512_undefined_ph (void)
228 {
229 #pragma GCC diagnostic push
230 #pragma GCC diagnostic ignored "-Winit-self"
231   __m512h __Y = __Y;
232 #pragma GCC diagnostic pop
233   return __Y;
234 }
235 
236 extern __inline _Float16
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_h(__m128h __A)238 _mm_cvtsh_h (__m128h __A)
239 {
240   return __A[0];
241 }
242 
243 extern __inline _Float16
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsh_h(__m256h __A)245 _mm256_cvtsh_h (__m256h __A)
246 {
247   return __A[0];
248 }
249 
250 extern __inline _Float16
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsh_h(__m512h __A)252 _mm512_cvtsh_h (__m512h __A)
253 {
254   return __A[0];
255 }
256 
257 extern __inline __m512
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_ps(__m512h __a)259 _mm512_castph_ps (__m512h __a)
260 {
261   return (__m512) __a;
262 }
263 
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_pd(__m512h __a)266 _mm512_castph_pd (__m512h __a)
267 {
268   return (__m512d) __a;
269 }
270 
271 extern __inline __m512i
272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_si512(__m512h __a)273 _mm512_castph_si512 (__m512h __a)
274 {
275   return (__m512i) __a;
276 }
277 
278 extern __inline __m128h
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph512_ph128(__m512h __A)280 _mm512_castph512_ph128 (__m512h __A)
281 {
282   union
283   {
284     __m128h __a[4];
285     __m512h __v;
286   } __u = { .__v = __A };
287   return __u.__a[0];
288 }
289 
290 extern __inline __m256h
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph512_ph256(__m512h __A)292 _mm512_castph512_ph256 (__m512h __A)
293 {
294   union
295   {
296     __m256h __a[2];
297     __m512h __v;
298   } __u = { .__v = __A };
299   return __u.__a[0];
300 }
301 
302 extern __inline __m512h
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph128_ph512(__m128h __A)304 _mm512_castph128_ph512 (__m128h __A)
305 {
306   union
307   {
308     __m128h __a[4];
309     __m512h __v;
310   } __u;
311   __u.__a[0] = __A;
312   return __u.__v;
313 }
314 
315 extern __inline __m512h
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph256_ph512(__m256h __A)317 _mm512_castph256_ph512 (__m256h __A)
318 {
319   union
320   {
321     __m256h __a[2];
322     __m512h __v;
323   } __u;
324   __u.__a[0] = __A;
325   return __u.__v;
326 }
327 
328 extern __inline __m512h
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_zextph128_ph512(__m128h __A)330 _mm512_zextph128_ph512 (__m128h __A)
331 {
332   return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
333 				       (__m128) __A, 0);
334 }
335 
336 extern __inline __m512h
337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_zextph256_ph512(__m256h __A)338 _mm512_zextph256_ph512 (__m256h __A)
339 {
340   return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
341 				       (__m256d) __A, 0);
342 }
343 
344 extern __inline __m512h
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castps_ph(__m512 __a)346 _mm512_castps_ph (__m512 __a)
347 {
348   return (__m512h) __a;
349 }
350 
351 extern __inline __m512h
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castpd_ph(__m512d __a)353 _mm512_castpd_ph (__m512d __a)
354 {
355   return (__m512h) __a;
356 }
357 
358 extern __inline __m512h
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castsi512_ph(__m512i __a)360 _mm512_castsi512_ph (__m512i __a)
361 {
362   return (__m512h) __a;
363 }
364 
365 /* Create a vector with element 0 as F and the rest zero.  */
366 extern __inline __m128h
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sh(_Float16 __F)368 _mm_set_sh (_Float16 __F)
369 {
370   return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
371 }
372 
373 /* Create a vector with element 0 as *P and the rest zero.  */
374 extern __inline __m128h
375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sh(void const * __P)376 _mm_load_sh (void const *__P)
377 {
378   return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
379 		     *(_Float16 const *) __P);
380 }
381 
382 extern __inline __m512h
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_ph(void const * __P)384 _mm512_load_ph (void const *__P)
385 {
386   return *(const __m512h *) __P;
387 }
388 
389 extern __inline __m256h
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_ph(void const * __P)391 _mm256_load_ph (void const *__P)
392 {
393   return *(const __m256h *) __P;
394 }
395 
396 extern __inline __m128h
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ph(void const * __P)398 _mm_load_ph (void const *__P)
399 {
400   return *(const __m128h *) __P;
401 }
402 
403 extern __inline __m512h
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_ph(void const * __P)405 _mm512_loadu_ph (void const *__P)
406 {
407   return *(const __m512h_u *) __P;
408 }
409 
410 extern __inline __m256h
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_ph(void const * __P)412 _mm256_loadu_ph (void const *__P)
413 {
414   return *(const __m256h_u *) __P;
415 }
416 
417 extern __inline __m128h
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_ph(void const * __P)419 _mm_loadu_ph (void const *__P)
420 {
421   return *(const __m128h_u *) __P;
422 }
423 
424 /* Stores the lower _Float16 value.  */
425 extern __inline void
426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sh(void * __P,__m128h __A)427 _mm_store_sh (void *__P, __m128h __A)
428 {
429   *(_Float16 *) __P = ((__v8hf)__A)[0];
430 }
431 
432 extern __inline void
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_ph(void * __P,__m512h __A)434 _mm512_store_ph (void *__P, __m512h __A)
435 {
436    *(__m512h *) __P = __A;
437 }
438 
439 extern __inline void
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_ph(void * __P,__m256h __A)441 _mm256_store_ph (void *__P, __m256h __A)
442 {
443    *(__m256h *) __P = __A;
444 }
445 
446 extern __inline void
447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ph(void * __P,__m128h __A)448 _mm_store_ph (void *__P, __m128h __A)
449 {
450    *(__m128h *) __P = __A;
451 }
452 
453 extern __inline void
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_storeu_ph(void * __P,__m512h __A)455 _mm512_storeu_ph (void *__P, __m512h __A)
456 {
457    *(__m512h_u *) __P = __A;
458 }
459 
460 extern __inline void
461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_ph(void * __P,__m256h __A)462 _mm256_storeu_ph (void *__P, __m256h __A)
463 {
464    *(__m256h_u *) __P = __A;
465 }
466 
467 extern __inline void
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_ph(void * __P,__m128h __A)469 _mm_storeu_ph (void *__P, __m128h __A)
470 {
471    *(__m128h_u *) __P = __A;
472 }
473 
474 extern __inline __m512h
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_abs_ph(__m512h __A)476 _mm512_abs_ph (__m512h __A)
477 {
478   return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
479 				      (__m512i) __A);
480 }
481 
482 /* Intrinsics v[add,sub,mul,div]ph.  */
483 extern __inline __m512h
484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_ph(__m512h __A,__m512h __B)485 _mm512_add_ph (__m512h __A, __m512h __B)
486 {
487   return (__m512h) ((__v32hf) __A + (__v32hf) __B);
488 }
489 
490 extern __inline __m512h
491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)492 _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
493 {
494   return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
495 }
496 
497 extern __inline __m512h
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_ph(__mmask32 __A,__m512h __B,__m512h __C)499 _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
500 {
501   return __builtin_ia32_addph512_mask (__B, __C,
502 				       _mm512_setzero_ph (), __A);
503 }
504 
505 extern __inline __m512h
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_ph(__m512h __A,__m512h __B)507 _mm512_sub_ph (__m512h __A, __m512h __B)
508 {
509   return (__m512h) ((__v32hf) __A - (__v32hf) __B);
510 }
511 
512 extern __inline __m512h
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)514 _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
515 {
516   return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
517 }
518 
519 extern __inline __m512h
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_ph(__mmask32 __A,__m512h __B,__m512h __C)521 _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
522 {
523   return __builtin_ia32_subph512_mask (__B, __C,
524 				       _mm512_setzero_ph (), __A);
525 }
526 
527 extern __inline __m512h
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_ph(__m512h __A,__m512h __B)529 _mm512_mul_ph (__m512h __A, __m512h __B)
530 {
531   return (__m512h) ((__v32hf) __A * (__v32hf) __B);
532 }
533 
534 extern __inline __m512h
535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)536 _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
537 {
538   return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
539 }
540 
541 extern __inline __m512h
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_ph(__mmask32 __A,__m512h __B,__m512h __C)543 _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
544 {
545   return __builtin_ia32_mulph512_mask (__B, __C,
546 				       _mm512_setzero_ph (), __A);
547 }
548 
549 extern __inline __m512h
550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_ph(__m512h __A,__m512h __B)551 _mm512_div_ph (__m512h __A, __m512h __B)
552 {
553   return (__m512h) ((__v32hf) __A / (__v32hf) __B);
554 }
555 
556 extern __inline __m512h
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)558 _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
559 {
560   return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
561 }
562 
563 extern __inline __m512h
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_ph(__mmask32 __A,__m512h __B,__m512h __C)565 _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
566 {
567   return __builtin_ia32_divph512_mask (__B, __C,
568 				       _mm512_setzero_ph (), __A);
569 }
570 
571 #ifdef __OPTIMIZE__
572 extern __inline __m512h
573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_ph(__m512h __A,__m512h __B,const int __C)574 _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
575 {
576   return __builtin_ia32_addph512_mask_round (__A, __B,
577 					     _mm512_setzero_ph (),
578 					     (__mmask32) -1, __C);
579 }
580 
581 extern __inline __m512h
582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)583 _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
584 			  __m512h __D, const int __E)
585 {
586   return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
587 }
588 
589 extern __inline __m512h
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)591 _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
592 			   const int __D)
593 {
594   return __builtin_ia32_addph512_mask_round (__B, __C,
595 					     _mm512_setzero_ph (),
596 					     __A, __D);
597 }
598 
599 extern __inline __m512h
600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_ph(__m512h __A,__m512h __B,const int __C)601 _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
602 {
603   return __builtin_ia32_subph512_mask_round (__A, __B,
604 					     _mm512_setzero_ph (),
605 					     (__mmask32) -1, __C);
606 }
607 
608 extern __inline __m512h
609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)610 _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
611 			  __m512h __D, const int __E)
612 {
613   return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
614 }
615 
616 extern __inline __m512h
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)618 _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
619 			   const int __D)
620 {
621   return __builtin_ia32_subph512_mask_round (__B, __C,
622 					     _mm512_setzero_ph (),
623 					     __A, __D);
624 }
625 
626 extern __inline __m512h
627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_ph(__m512h __A,__m512h __B,const int __C)628 _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
629 {
630   return __builtin_ia32_mulph512_mask_round (__A, __B,
631 					     _mm512_setzero_ph (),
632 					     (__mmask32) -1, __C);
633 }
634 
635 extern __inline __m512h
636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)637 _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
638 			  __m512h __D, const int __E)
639 {
640   return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
641 }
642 
643 extern __inline __m512h
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)645 _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
646 			   const int __D)
647 {
648   return __builtin_ia32_mulph512_mask_round (__B, __C,
649 					     _mm512_setzero_ph (),
650 					     __A, __D);
651 }
652 
653 extern __inline __m512h
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_ph(__m512h __A,__m512h __B,const int __C)655 _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
656 {
657   return __builtin_ia32_divph512_mask_round (__A, __B,
658 					     _mm512_setzero_ph (),
659 					     (__mmask32) -1, __C);
660 }
661 
662 extern __inline __m512h
663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)664 _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
665 			  __m512h __D, const int __E)
666 {
667   return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
668 }
669 
670 extern __inline __m512h
671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)672 _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
673 			   const int __D)
674 {
675   return __builtin_ia32_divph512_mask_round (__B, __C,
676 					     _mm512_setzero_ph (),
677 					     __A, __D);
678 }
679 #else
680 #define _mm512_add_round_ph(A, B, C)					\
681   ((__m512h)__builtin_ia32_addph512_mask_round((A), (B),		\
682 					       _mm512_setzero_ph (),	\
683 					       (__mmask32)-1, (C)))
684 
685 #define _mm512_mask_add_round_ph(A, B, C, D, E)				\
686   ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
687 
688 #define _mm512_maskz_add_round_ph(A, B, C, D)				\
689   ((__m512h)__builtin_ia32_addph512_mask_round((B), (C),		\
690 					       _mm512_setzero_ph (),	\
691 					       (A), (D)))
692 
693 #define _mm512_sub_round_ph(A, B, C)					\
694   ((__m512h)__builtin_ia32_subph512_mask_round((A), (B),		\
695 					       _mm512_setzero_ph (),	\
696 					       (__mmask32)-1, (C)))
697 
698 #define _mm512_mask_sub_round_ph(A, B, C, D, E)				\
699   ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
700 
701 #define _mm512_maskz_sub_round_ph(A, B, C, D)				\
702   ((__m512h)__builtin_ia32_subph512_mask_round((B), (C),		\
703 					       _mm512_setzero_ph (),	\
704 					       (A), (D)))
705 
706 #define _mm512_mul_round_ph(A, B, C)					\
707   ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B),		\
708 					       _mm512_setzero_ph (),	\
709 					       (__mmask32)-1, (C)))
710 
711 #define _mm512_mask_mul_round_ph(A, B, C, D, E)				\
712   ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
713 
714 #define _mm512_maskz_mul_round_ph(A, B, C, D)				\
715   ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C),		\
716 					       _mm512_setzero_ph (),	\
717 					       (A), (D)))
718 
719 #define _mm512_div_round_ph(A, B, C)					\
720   ((__m512h)__builtin_ia32_divph512_mask_round((A), (B),		\
721 					       _mm512_setzero_ph (),	\
722 					       (__mmask32)-1, (C)))
723 
724 #define _mm512_mask_div_round_ph(A, B, C, D, E)				\
725   ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
726 
727 #define _mm512_maskz_div_round_ph(A, B, C, D)				\
728   ((__m512h)__builtin_ia32_divph512_mask_round((B), (C),		\
729 					       _mm512_setzero_ph (),	\
730 					       (A), (D)))
731 #endif  /* __OPTIMIZE__  */
732 
733 extern __inline __m512h
734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_conj_pch(__m512h __A)735 _mm512_conj_pch (__m512h __A)
736 {
737   return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
738 }
739 
740 extern __inline __m512h
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_conj_pch(__m512h __W,__mmask16 __U,__m512h __A)742 _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
743 {
744   return (__m512h)
745     __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
746 				   (__v16sf) __W,
747 				   (__mmask16) __U);
748 }
749 
750 extern __inline __m512h
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_conj_pch(__mmask16 __U,__m512h __A)752 _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
753 {
754   return (__m512h)
755     __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
756 				   (__v16sf) _mm512_setzero_ps (),
757 				   (__mmask16) __U);
758 }
759 
760 /* Intrinsics of v[add,sub,mul,div]sh.  */
761 extern __inline __m128h
762   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sh(__m128h __A,__m128h __B)763 _mm_add_sh (__m128h __A, __m128h __B)
764 {
765   __A[0] += __B[0];
766   return __A;
767 }
768 
769 extern __inline __m128h
770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)771 _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
772 {
773   return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
774 }
775 
776 extern __inline __m128h
777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_sh(__mmask8 __A,__m128h __B,__m128h __C)778 _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
779 {
780   return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
781 				    __A);
782 }
783 
784 extern __inline __m128h
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sh(__m128h __A,__m128h __B)786 _mm_sub_sh (__m128h __A, __m128h __B)
787 {
788   __A[0] -= __B[0];
789   return __A;
790 }
791 
792 extern __inline __m128h
793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)794 _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
795 {
796   return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
797 }
798 
799 extern __inline __m128h
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_sh(__mmask8 __A,__m128h __B,__m128h __C)801 _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
802 {
803   return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
804 				    __A);
805 }
806 
807 extern __inline __m128h
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sh(__m128h __A,__m128h __B)809 _mm_mul_sh (__m128h __A, __m128h __B)
810 {
811   __A[0] *= __B[0];
812   return __A;
813 }
814 
815 extern __inline __m128h
816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)817 _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
818 {
819   return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
820 }
821 
822 extern __inline __m128h
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_sh(__mmask8 __A,__m128h __B,__m128h __C)824 _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
825 {
826   return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
827 }
828 
829 extern __inline __m128h
830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sh(__m128h __A,__m128h __B)831 _mm_div_sh (__m128h __A, __m128h __B)
832 {
833   __A[0] /= __B[0];
834   return __A;
835 }
836 
837 extern __inline __m128h
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)839 _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
840 {
841   return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
842 }
843 
844 extern __inline __m128h
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_sh(__mmask8 __A,__m128h __B,__m128h __C)846 _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
847 {
848   return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
849 				    __A);
850 }
851 
852 #ifdef __OPTIMIZE__
853 extern __inline __m128h
854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_sh(__m128h __A,__m128h __B,const int __C)855 _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
856 {
857   return __builtin_ia32_addsh_mask_round (__A, __B,
858 					  _mm_setzero_ph (),
859 					  (__mmask8) -1, __C);
860 }
861 
862 extern __inline __m128h
863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)864 _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
865 		       __m128h __D, const int __E)
866 {
867   return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
868 }
869 
870 extern __inline __m128h
871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)872 _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
873 			const int __D)
874 {
875   return __builtin_ia32_addsh_mask_round (__B, __C,
876 					  _mm_setzero_ph (),
877 					  __A, __D);
878 }
879 
880 extern __inline __m128h
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_sh(__m128h __A,__m128h __B,const int __C)882 _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
883 {
884   return __builtin_ia32_subsh_mask_round (__A, __B,
885 					  _mm_setzero_ph (),
886 					  (__mmask8) -1, __C);
887 }
888 
889 extern __inline __m128h
890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)891 _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
892 		       __m128h __D, const int __E)
893 {
894   return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
895 }
896 
897 extern __inline __m128h
898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)899 _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
900 			const int __D)
901 {
902   return __builtin_ia32_subsh_mask_round (__B, __C,
903 					  _mm_setzero_ph (),
904 					  __A, __D);
905 }
906 
907 extern __inline __m128h
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_sh(__m128h __A,__m128h __B,const int __C)909 _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
910 {
911   return __builtin_ia32_mulsh_mask_round (__A, __B,
912 					  _mm_setzero_ph (),
913 					  (__mmask8) -1, __C);
914 }
915 
916 extern __inline __m128h
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)918 _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
919 		       __m128h __D, const int __E)
920 {
921   return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
922 }
923 
924 extern __inline __m128h
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)926 _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
927 			const int __D)
928 {
929   return __builtin_ia32_mulsh_mask_round (__B, __C,
930 					  _mm_setzero_ph (),
931 					  __A, __D);
932 }
933 
934 extern __inline __m128h
935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_sh(__m128h __A,__m128h __B,const int __C)936 _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
937 {
938   return __builtin_ia32_divsh_mask_round (__A, __B,
939 					  _mm_setzero_ph (),
940 					  (__mmask8) -1, __C);
941 }
942 
943 extern __inline __m128h
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)945 _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
946 		       __m128h __D, const int __E)
947 {
948   return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
949 }
950 
951 extern __inline __m128h
952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)953 _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
954 			const int __D)
955 {
956   return __builtin_ia32_divsh_mask_round (__B, __C,
957 					  _mm_setzero_ph (),
958 					  __A, __D);
959 }
960 #else
961 #define _mm_add_round_sh(A, B, C)					\
962   ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B),			\
963 					     _mm_setzero_ph (),		\
964 					     (__mmask8)-1, (C)))
965 
966 #define _mm_mask_add_round_sh(A, B, C, D, E)				\
967   ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
968 
969 #define _mm_maskz_add_round_sh(A, B, C, D)			\
970   ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C),		\
971 					     _mm_setzero_ph (),	\
972 					     (A), (D)))
973 
974 #define _mm_sub_round_sh(A, B, C)					\
975   ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B),			\
976 					     _mm_setzero_ph (),		\
977 					     (__mmask8)-1, (C)))
978 
979 #define _mm_mask_sub_round_sh(A, B, C, D, E)				\
980   ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
981 
982 #define _mm_maskz_sub_round_sh(A, B, C, D)			\
983   ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C),		\
984 					     _mm_setzero_ph (),	\
985 					     (A), (D)))
986 
987 #define _mm_mul_round_sh(A, B, C)					\
988   ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B),			\
989 					     _mm_setzero_ph (),		\
990 					     (__mmask8)-1, (C)))
991 
992 #define _mm_mask_mul_round_sh(A, B, C, D, E)				\
993   ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
994 
995 #define _mm_maskz_mul_round_sh(A, B, C, D)			\
996   ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C),		\
997 					     _mm_setzero_ph (),	\
998 					     (A), (D)))
999 
1000 #define _mm_div_round_sh(A, B, C)					\
1001   ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B),			\
1002 					     _mm_setzero_ph (),		\
1003 					     (__mmask8)-1, (C)))
1004 
1005 #define _mm_mask_div_round_sh(A, B, C, D, E)				\
1006   ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
1007 
1008 #define _mm_maskz_div_round_sh(A, B, C, D)			\
1009   ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C),		\
1010 					     _mm_setzero_ph (),	\
1011 					     (A), (D)))
1012 #endif /* __OPTIMIZE__ */
1013 
1014 /* Intrinsic vmaxph vminph.  */
1015 extern __inline __m512h
1016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_ph(__m512h __A,__m512h __B)1017 _mm512_max_ph (__m512h __A, __m512h __B)
1018 {
1019   return __builtin_ia32_maxph512_mask (__A, __B,
1020 				       _mm512_setzero_ph (),
1021 				       (__mmask32) -1);
1022 }
1023 
1024 extern __inline __m512h
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1026 _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1027 {
1028   return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
1029 }
1030 
1031 extern __inline __m512h
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_ph(__mmask32 __A,__m512h __B,__m512h __C)1033 _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
1034 {
1035   return __builtin_ia32_maxph512_mask (__B, __C,
1036 				       _mm512_setzero_ph (), __A);
1037 }
1038 
1039 extern __inline __m512h
1040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_ph(__m512h __A,__m512h __B)1041 _mm512_min_ph (__m512h __A, __m512h __B)
1042 {
1043   return __builtin_ia32_minph512_mask (__A, __B,
1044 				       _mm512_setzero_ph (),
1045 				       (__mmask32) -1);
1046 }
1047 
1048 extern __inline __m512h
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1050 _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1051 {
1052   return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
1053 }
1054 
1055 extern __inline __m512h
1056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_ph(__mmask32 __A,__m512h __B,__m512h __C)1057 _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
1058 {
1059   return __builtin_ia32_minph512_mask (__B, __C,
1060 				       _mm512_setzero_ph (), __A);
1061 }
1062 
1063 #ifdef __OPTIMIZE__
1064 extern __inline __m512h
1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_round_ph(__m512h __A,__m512h __B,const int __C)1066 _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
1067 {
1068   return __builtin_ia32_maxph512_mask_round (__A, __B,
1069 					     _mm512_setzero_ph (),
1070 					     (__mmask32) -1, __C);
1071 }
1072 
1073 extern __inline __m512h
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1075 _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1076 			  __m512h __D, const int __E)
1077 {
1078   return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
1079 }
1080 
1081 extern __inline __m512h
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1083 _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1084 			   const int __D)
1085 {
1086   return __builtin_ia32_maxph512_mask_round (__B, __C,
1087 					     _mm512_setzero_ph (),
1088 					     __A, __D);
1089 }
1090 
1091 extern __inline __m512h
1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_round_ph(__m512h __A,__m512h __B,const int __C)1093 _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
1094 {
1095   return __builtin_ia32_minph512_mask_round (__A, __B,
1096 					     _mm512_setzero_ph (),
1097 					     (__mmask32) -1, __C);
1098 }
1099 
1100 extern __inline __m512h
1101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1102 _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1103 			  __m512h __D, const int __E)
1104 {
1105   return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
1106 }
1107 
1108 extern __inline __m512h
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1110 _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1111 			   const int __D)
1112 {
1113   return __builtin_ia32_minph512_mask_round (__B, __C,
1114 					     _mm512_setzero_ph (),
1115 					     __A, __D);
1116 }
1117 
1118 #else
1119 #define _mm512_max_round_ph(A, B, C)				\
1120   (__builtin_ia32_maxph512_mask_round ((A), (B),		\
1121 				       _mm512_setzero_ph (),	\
1122 				       (__mmask32)-1, (C)))
1123 
1124 #define _mm512_mask_max_round_ph(A, B, C, D, E)				\
1125   (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
1126 
1127 #define _mm512_maskz_max_round_ph(A, B, C, D)			\
1128   (__builtin_ia32_maxph512_mask_round ((B), (C),		\
1129 				       _mm512_setzero_ph (),	\
1130 				       (A), (D)))
1131 
1132 #define _mm512_min_round_ph(A, B, C)				\
1133   (__builtin_ia32_minph512_mask_round ((A), (B),		\
1134 				       _mm512_setzero_ph (),	\
1135 				       (__mmask32)-1, (C)))
1136 
1137 #define _mm512_mask_min_round_ph(A, B, C, D, E)				\
1138   (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
1139 
1140 #define _mm512_maskz_min_round_ph(A, B, C, D)			\
1141   (__builtin_ia32_minph512_mask_round ((B), (C),		\
1142 				       _mm512_setzero_ph (),	\
1143 				       (A), (D)))
1144 #endif /* __OPTIMIZE__ */
1145 
1146 /* Intrinsic vmaxsh vminsh.  */
1147 extern __inline __m128h
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sh(__m128h __A,__m128h __B)1149 _mm_max_sh (__m128h __A, __m128h __B)
1150 {
1151   __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
1152   return __A;
1153 }
1154 
1155 extern __inline __m128h
1156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1157 _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1158 {
1159   return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
1160 }
1161 
1162 extern __inline __m128h
1163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_sh(__mmask8 __A,__m128h __B,__m128h __C)1164 _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
1165 {
1166   return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
1167 				    __A);
1168 }
1169 
1170 extern __inline __m128h
1171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sh(__m128h __A,__m128h __B)1172 _mm_min_sh (__m128h __A, __m128h __B)
1173 {
1174   __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
1175   return __A;
1176 }
1177 
1178 extern __inline __m128h
1179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1180 _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1181 {
1182   return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
1183 }
1184 
1185 extern __inline __m128h
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_sh(__mmask8 __A,__m128h __B,__m128h __C)1187 _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
1188 {
1189   return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
1190 				    __A);
1191 }
1192 
1193 #ifdef __OPTIMIZE__
1194 extern __inline __m128h
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_round_sh(__m128h __A,__m128h __B,const int __C)1196 _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
1197 {
1198   return __builtin_ia32_maxsh_mask_round (__A, __B,
1199 					  _mm_setzero_ph (),
1200 					  (__mmask8) -1, __C);
1201 }
1202 
1203 extern __inline __m128h
1204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1205 _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1206 		       __m128h __D, const int __E)
1207 {
1208   return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
1209 }
1210 
1211 extern __inline __m128h
1212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1213 _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1214 			const int __D)
1215 {
1216   return __builtin_ia32_maxsh_mask_round (__B, __C,
1217 					  _mm_setzero_ph (),
1218 					  __A, __D);
1219 }
1220 
1221 extern __inline __m128h
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_round_sh(__m128h __A,__m128h __B,const int __C)1223 _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
1224 {
1225   return __builtin_ia32_minsh_mask_round (__A, __B,
1226 					  _mm_setzero_ph (),
1227 					  (__mmask8) -1, __C);
1228 }
1229 
1230 extern __inline __m128h
1231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1232 _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1233 		       __m128h __D, const int __E)
1234 {
1235   return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
1236 }
1237 
1238 extern __inline __m128h
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1240 _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1241 			const int __D)
1242 {
1243   return __builtin_ia32_minsh_mask_round (__B, __C,
1244 					  _mm_setzero_ph (),
1245 					  __A, __D);
1246 }
1247 
1248 #else
1249 #define _mm_max_round_sh(A, B, C)			\
1250   (__builtin_ia32_maxsh_mask_round ((A), (B),		\
1251 				    _mm_setzero_ph (),	\
1252 				    (__mmask8)-1, (C)))
1253 
1254 #define _mm_mask_max_round_sh(A, B, C, D, E)			\
1255   (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
1256 
1257 #define _mm_maskz_max_round_sh(A, B, C, D)		\
1258   (__builtin_ia32_maxsh_mask_round ((B), (C),		\
1259 				    _mm_setzero_ph (),	\
1260 				    (A), (D)))
1261 
1262 #define _mm_min_round_sh(A, B, C)			\
1263   (__builtin_ia32_minsh_mask_round ((A), (B),		\
1264 				    _mm_setzero_ph (),	\
1265 				    (__mmask8)-1, (C)))
1266 
1267 #define _mm_mask_min_round_sh(A, B, C, D, E)			\
1268   (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
1269 
1270 #define _mm_maskz_min_round_sh(A, B, C, D)		\
1271   (__builtin_ia32_minsh_mask_round ((B), (C),		\
1272 				    _mm_setzero_ph (),	\
1273 				    (A), (D)))
1274 
1275 #endif /* __OPTIMIZE__ */
1276 
1277 /* vcmpph */
1278 #ifdef __OPTIMIZE
1279 extern __inline __mmask32
1280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_ph_mask(__m512h __A,__m512h __B,const int __C)1281 _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
1282 {
1283   return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
1284 						   (__mmask32) -1);
1285 }
1286 
1287 extern __inline __mmask32
1288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_ph_mask(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1289 _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1290 			 const int __D)
1291 {
1292   return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
1293 						   __A);
1294 }
1295 
1296 extern __inline __mmask32
1297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_round_ph_mask(__m512h __A,__m512h __B,const int __C,const int __D)1298 _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
1299 			  const int __D)
1300 {
1301   return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
1302 							 __C, (__mmask32) -1,
1303 							 __D);
1304 }
1305 
1306 extern __inline __mmask32
1307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_round_ph_mask(__mmask32 __A,__m512h __B,__m512h __C,const int __D,const int __E)1308 _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1309 			       const int __D, const int __E)
1310 {
1311   return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
1312 							 __D, __A,
1313 							 __E);
1314 }
1315 
1316 #else
1317 #define _mm512_cmp_ph_mask(A, B, C)			\
1318   (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
1319 
1320 #define _mm512_mask_cmp_ph_mask(A, B, C, D)		\
1321   (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
1322 
1323 #define _mm512_cmp_round_ph_mask(A, B, C, D)				\
1324   (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
1325 
1326 #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)			\
1327   (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
1328 
1329 #endif /* __OPTIMIZE__ */
1330 
1331 /* Intrinsics vcmpsh.  */
1332 #ifdef __OPTIMIZE__
1333 extern __inline __mmask8
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sh_mask(__m128h __A,__m128h __B,const int __C)1335 _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
1336 {
1337   return (__mmask8)
1338     __builtin_ia32_cmpsh_mask_round (__A, __B,
1339 				     __C, (__mmask8) -1,
1340 				     _MM_FROUND_CUR_DIRECTION);
1341 }
1342 
1343 extern __inline __mmask8
1344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_sh_mask(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1345 _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1346 		      const int __D)
1347 {
1348   return (__mmask8)
1349     __builtin_ia32_cmpsh_mask_round (__B, __C,
1350 				     __D, __A,
1351 				     _MM_FROUND_CUR_DIRECTION);
1352 }
1353 
1354 extern __inline __mmask8
1355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_round_sh_mask(__m128h __A,__m128h __B,const int __C,const int __D)1356 _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
1357 		       const int __D)
1358 {
1359   return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
1360 						     __C, (__mmask8) -1,
1361 						     __D);
1362 }
1363 
1364 extern __inline __mmask8
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_round_sh_mask(__mmask8 __A,__m128h __B,__m128h __C,const int __D,const int __E)1366 _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1367 			    const int __D, const int __E)
1368 {
1369   return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
1370 						     __D, __A,
1371 						     __E);
1372 }
1373 
1374 #else
1375 #define _mm_cmp_sh_mask(A, B, C)					\
1376   (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1),		\
1377 				    (_MM_FROUND_CUR_DIRECTION)))
1378 
1379 #define _mm_mask_cmp_sh_mask(A, B, C, D)				\
1380   (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A),			\
1381 				    (_MM_FROUND_CUR_DIRECTION)))
1382 
1383 #define _mm_cmp_round_sh_mask(A, B, C, D)			\
1384   (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
1385 
1386 #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)		\
1387   (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
1388 
1389 #endif /* __OPTIMIZE__ */
1390 
1391 /* Intrinsics vcomish.  */
1392 extern __inline int
1393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sh(__m128h __A,__m128h __B)1394 _mm_comieq_sh (__m128h __A, __m128h __B)
1395 {
1396   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
1397 					  (__mmask8) -1,
1398 					  _MM_FROUND_CUR_DIRECTION);
1399 }
1400 
1401 extern __inline int
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sh(__m128h __A,__m128h __B)1403 _mm_comilt_sh (__m128h __A, __m128h __B)
1404 {
1405   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
1406 					  (__mmask8) -1,
1407 					  _MM_FROUND_CUR_DIRECTION);
1408 }
1409 
1410 extern __inline int
1411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sh(__m128h __A,__m128h __B)1412 _mm_comile_sh (__m128h __A, __m128h __B)
1413 {
1414   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
1415 					  (__mmask8) -1,
1416 					  _MM_FROUND_CUR_DIRECTION);
1417 }
1418 
1419 extern __inline int
1420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sh(__m128h __A,__m128h __B)1421 _mm_comigt_sh (__m128h __A, __m128h __B)
1422 {
1423   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
1424 					  (__mmask8) -1,
1425 					  _MM_FROUND_CUR_DIRECTION);
1426 }
1427 
1428 extern __inline int
1429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sh(__m128h __A,__m128h __B)1430 _mm_comige_sh (__m128h __A, __m128h __B)
1431 {
1432   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
1433 					  (__mmask8) -1,
1434 					  _MM_FROUND_CUR_DIRECTION);
1435 }
1436 
1437 extern __inline int
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sh(__m128h __A,__m128h __B)1439 _mm_comineq_sh (__m128h __A, __m128h __B)
1440 {
1441   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
1442 					  (__mmask8) -1,
1443 					  _MM_FROUND_CUR_DIRECTION);
1444 }
1445 
1446 extern __inline int
1447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sh(__m128h __A,__m128h __B)1448 _mm_ucomieq_sh (__m128h __A, __m128h __B)
1449 {
1450   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
1451 					  (__mmask8) -1,
1452 					  _MM_FROUND_CUR_DIRECTION);
1453 }
1454 
1455 extern __inline int
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sh(__m128h __A,__m128h __B)1457 _mm_ucomilt_sh (__m128h __A, __m128h __B)
1458 {
1459   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
1460 					  (__mmask8) -1,
1461 					  _MM_FROUND_CUR_DIRECTION);
1462 }
1463 
1464 extern __inline int
1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sh(__m128h __A,__m128h __B)1466 _mm_ucomile_sh (__m128h __A, __m128h __B)
1467 {
1468   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
1469 					  (__mmask8) -1,
1470 					  _MM_FROUND_CUR_DIRECTION);
1471 }
1472 
1473 extern __inline int
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sh(__m128h __A,__m128h __B)1475 _mm_ucomigt_sh (__m128h __A, __m128h __B)
1476 {
1477   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
1478 					  (__mmask8) -1,
1479 					  _MM_FROUND_CUR_DIRECTION);
1480 }
1481 
1482 extern __inline int
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sh(__m128h __A,__m128h __B)1484 _mm_ucomige_sh (__m128h __A, __m128h __B)
1485 {
1486   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
1487 					  (__mmask8) -1,
1488 					  _MM_FROUND_CUR_DIRECTION);
1489 }
1490 
1491 extern __inline int
1492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sh(__m128h __A,__m128h __B)1493 _mm_ucomineq_sh (__m128h __A, __m128h __B)
1494 {
1495   return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
1496 					  (__mmask8) -1,
1497 					  _MM_FROUND_CUR_DIRECTION);
1498 }
1499 
1500 #ifdef __OPTIMIZE__
1501 extern __inline int
1502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comi_sh(__m128h __A,__m128h __B,const int __P)1503 _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
1504 {
1505   return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1506 					  (__mmask8) -1,
1507 					  _MM_FROUND_CUR_DIRECTION);
1508 }
1509 
1510 extern __inline int
1511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comi_round_sh(__m128h __A,__m128h __B,const int __P,const int __R)1512 _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
1513 {
1514   return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1515 					  (__mmask8) -1,__R);
1516 }
1517 
1518 #else
1519 #define _mm_comi_round_sh(A, B, P, R)					\
1520   (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
1521 #define _mm_comi_sh(A, B, P)						\
1522   (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1),	\
1523 				    _MM_FROUND_CUR_DIRECTION))
1524 
1525 #endif /* __OPTIMIZE__  */
1526 
1527 /* Intrinsics vsqrtph.  */
1528 extern __inline __m512h
1529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_ph(__m512h __A)1530 _mm512_sqrt_ph (__m512h __A)
1531 {
1532   return __builtin_ia32_sqrtph512_mask_round (__A,
1533 					      _mm512_setzero_ph(),
1534 					      (__mmask32) -1,
1535 					      _MM_FROUND_CUR_DIRECTION);
1536 }
1537 
1538 extern __inline __m512h
1539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_ph(__m512h __A,__mmask32 __B,__m512h __C)1540 _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1541 {
1542   return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
1543 					      _MM_FROUND_CUR_DIRECTION);
1544 }
1545 
1546 extern __inline __m512h
1547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_ph(__mmask32 __A,__m512h __B)1548 _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
1549 {
1550   return __builtin_ia32_sqrtph512_mask_round (__B,
1551 					      _mm512_setzero_ph (),
1552 					      __A,
1553 					      _MM_FROUND_CUR_DIRECTION);
1554 }
1555 
1556 #ifdef __OPTIMIZE__
1557 extern __inline __m512h
1558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_ph(__m512h __A,const int __B)1559 _mm512_sqrt_round_ph (__m512h __A, const int __B)
1560 {
1561   return __builtin_ia32_sqrtph512_mask_round (__A,
1562 					      _mm512_setzero_ph(),
1563 					      (__mmask32) -1, __B);
1564 }
1565 
1566 extern __inline __m512h
1567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_ph(__m512h __A,__mmask32 __B,__m512h __C,const int __D)1568 _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1569 			   const int __D)
1570 {
1571   return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
1572 }
1573 
1574 extern __inline __m512h
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_ph(__mmask32 __A,__m512h __B,const int __C)1576 _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
1577 {
1578   return __builtin_ia32_sqrtph512_mask_round (__B,
1579 					      _mm512_setzero_ph (),
1580 					      __A, __C);
1581 }
1582 
1583 #else
1584 #define _mm512_sqrt_round_ph(A, B)				\
1585   (__builtin_ia32_sqrtph512_mask_round ((A),			\
1586 					_mm512_setzero_ph (),	\
1587 					(__mmask32)-1, (B)))
1588 
1589 #define _mm512_mask_sqrt_round_ph(A, B, C, D)			\
1590   (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
1591 
1592 #define _mm512_maskz_sqrt_round_ph(A, B, C)			\
1593   (__builtin_ia32_sqrtph512_mask_round ((B),			\
1594 					_mm512_setzero_ph (),	\
1595 					(A), (C)))
1596 
1597 #endif /* __OPTIMIZE__ */
1598 
1599 /* Intrinsics vrsqrtph.  */
1600 extern __inline __m512h
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt_ph(__m512h __A)1602 _mm512_rsqrt_ph (__m512h __A)
1603 {
1604   return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
1605 					 (__mmask32) -1);
1606 }
1607 
1608 extern __inline __m512h
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt_ph(__m512h __A,__mmask32 __B,__m512h __C)1610 _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1611 {
1612   return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
1613 }
1614 
1615 extern __inline __m512h
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt_ph(__mmask32 __A,__m512h __B)1617 _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
1618 {
1619   return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
1620 					 __A);
1621 }
1622 
1623 /* Intrinsics vrsqrtsh.  */
1624 extern __inline __m128h
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_sh(__m128h __A,__m128h __B)1626 _mm_rsqrt_sh (__m128h __A, __m128h __B)
1627 {
1628   return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
1629 				      (__mmask8) -1);
1630 }
1631 
1632 extern __inline __m128h
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1634 _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1635 {
1636   return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
1637 }
1638 
1639 extern __inline __m128h
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt_sh(__mmask8 __A,__m128h __B,__m128h __C)1641 _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1642 {
1643   return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
1644 				      __A);
1645 }
1646 
1647 /* Intrinsics vsqrtsh.  */
1648 extern __inline __m128h
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sh(__m128h __A,__m128h __B)1650 _mm_sqrt_sh (__m128h __A, __m128h __B)
1651 {
1652   return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1653 					   _mm_setzero_ph (),
1654 					   (__mmask8) -1,
1655 					   _MM_FROUND_CUR_DIRECTION);
1656 }
1657 
1658 extern __inline __m128h
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1660 _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1661 {
1662   return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1663 					   _MM_FROUND_CUR_DIRECTION);
1664 }
1665 
1666 extern __inline __m128h
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_sh(__mmask8 __A,__m128h __B,__m128h __C)1668 _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1669 {
1670   return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1671 					   _mm_setzero_ph (),
1672 					   __A, _MM_FROUND_CUR_DIRECTION);
1673 }
1674 
1675 #ifdef __OPTIMIZE__
1676 extern __inline __m128h
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_sh(__m128h __A,__m128h __B,const int __C)1678 _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
1679 {
1680   return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1681 					   _mm_setzero_ph (),
1682 					   (__mmask8) -1, __C);
1683 }
1684 
1685 extern __inline __m128h
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1687 _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1688 			__m128h __D, const int __E)
1689 {
1690   return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1691 					   __E);
1692 }
1693 
1694 extern __inline __m128h
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1696 _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1697 			 const int __D)
1698 {
1699   return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1700 					   _mm_setzero_ph (),
1701 					   __A, __D);
1702 }
1703 
1704 #else
1705 #define _mm_sqrt_round_sh(A, B, C)				\
1706   (__builtin_ia32_sqrtsh_mask_round ((B), (A),			\
1707 				     _mm_setzero_ph (),		\
1708 				     (__mmask8)-1, (C)))
1709 
1710 #define _mm_mask_sqrt_round_sh(A, B, C, D, E)			\
1711   (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
1712 
1713 #define _mm_maskz_sqrt_round_sh(A, B, C, D)		\
1714   (__builtin_ia32_sqrtsh_mask_round ((C), (B),		\
1715 				     _mm_setzero_ph (),	\
1716 				     (A), (D)))
1717 
1718 #endif /* __OPTIMIZE__ */
1719 
1720 /* Intrinsics vrcpph.  */
1721 extern __inline __m512h
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp_ph(__m512h __A)1723 _mm512_rcp_ph (__m512h __A)
1724 {
1725   return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
1726 				       (__mmask32) -1);
1727 }
1728 
1729 extern __inline __m512h
1730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp_ph(__m512h __A,__mmask32 __B,__m512h __C)1731 _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
1732 {
1733   return __builtin_ia32_rcpph512_mask (__C, __A, __B);
1734 }
1735 
1736 extern __inline __m512h
1737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp_ph(__mmask32 __A,__m512h __B)1738 _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
1739 {
1740   return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
1741 				       __A);
1742 }
1743 
1744 /* Intrinsics vrcpsh.  */
1745 extern __inline __m128h
1746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_sh(__m128h __A,__m128h __B)1747 _mm_rcp_sh (__m128h __A, __m128h __B)
1748 {
1749   return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
1750 				    (__mmask8) -1);
1751 }
1752 
1753 extern __inline __m128h
1754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp_sh(__m128h __A,__mmask32 __B,__m128h __C,__m128h __D)1755 _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
1756 {
1757   return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
1758 }
1759 
1760 extern __inline __m128h
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp_sh(__mmask32 __A,__m128h __B,__m128h __C)1762 _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
1763 {
1764   return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
1765 				    __A);
1766 }
1767 
1768 /* Intrinsics vscalefph.  */
1769 extern __inline __m512h
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_ph(__m512h __A,__m512h __B)1771 _mm512_scalef_ph (__m512h __A, __m512h __B)
1772 {
1773   return __builtin_ia32_scalefph512_mask_round (__A, __B,
1774 						_mm512_setzero_ph (),
1775 						(__mmask32) -1,
1776 						_MM_FROUND_CUR_DIRECTION);
1777 }
1778 
1779 extern __inline __m512h
1780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1781 _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1782 {
1783   return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1784 						_MM_FROUND_CUR_DIRECTION);
1785 }
1786 
1787 extern __inline __m512h
1788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_ph(__mmask32 __A,__m512h __B,__m512h __C)1789 _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
1790 {
1791   return __builtin_ia32_scalefph512_mask_round (__B, __C,
1792 						_mm512_setzero_ph (),
1793 						__A,
1794 						_MM_FROUND_CUR_DIRECTION);
1795 }
1796 
1797 #ifdef __OPTIMIZE__
1798 extern __inline __m512h
1799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_round_ph(__m512h __A,__m512h __B,const int __C)1800 _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
1801 {
1802   return __builtin_ia32_scalefph512_mask_round (__A, __B,
1803 						_mm512_setzero_ph (),
1804 						(__mmask32) -1, __C);
1805 }
1806 
1807 extern __inline __m512h
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1809 _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1810 			     __m512h __D, const int __E)
1811 {
1812   return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1813 						__E);
1814 }
1815 
1816 extern __inline __m512h
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1818 _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1819 			      const int __D)
1820 {
1821   return __builtin_ia32_scalefph512_mask_round (__B, __C,
1822 						_mm512_setzero_ph (),
1823 						__A, __D);
1824 }
1825 
1826 #else
1827 #define _mm512_scalef_round_ph(A, B, C)				\
1828   (__builtin_ia32_scalefph512_mask_round ((A), (B),		\
1829 					  _mm512_setzero_ph (),	\
1830 					  (__mmask32)-1, (C)))
1831 
1832 #define _mm512_mask_scalef_round_ph(A, B, C, D, E)			\
1833   (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
1834 
1835 #define _mm512_maskz_scalef_round_ph(A, B, C, D)		\
1836   (__builtin_ia32_scalefph512_mask_round ((B), (C),		\
1837 					  _mm512_setzero_ph (),	\
1838 					  (A), (D)))
1839 
1840 #endif  /* __OPTIMIZE__ */
1841 
1842 /* Intrinsics vscalefsh.  */
1843 extern __inline __m128h
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_sh(__m128h __A,__m128h __B)1845 _mm_scalef_sh (__m128h __A, __m128h __B)
1846 {
1847   return __builtin_ia32_scalefsh_mask_round (__A, __B,
1848 					     _mm_setzero_ph (),
1849 					     (__mmask8) -1,
1850 					     _MM_FROUND_CUR_DIRECTION);
1851 }
1852 
1853 extern __inline __m128h
1854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1855 _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1856 {
1857   return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1858 					     _MM_FROUND_CUR_DIRECTION);
1859 }
1860 
1861 extern __inline __m128h
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_sh(__mmask8 __A,__m128h __B,__m128h __C)1863 _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
1864 {
1865   return __builtin_ia32_scalefsh_mask_round (__B, __C,
1866 					     _mm_setzero_ph (),
1867 					     __A,
1868 					     _MM_FROUND_CUR_DIRECTION);
1869 }
1870 
1871 #ifdef __OPTIMIZE__
1872 extern __inline __m128h
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_sh(__m128h __A,__m128h __B,const int __C)1874 _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
1875 {
1876   return __builtin_ia32_scalefsh_mask_round (__A, __B,
1877 					     _mm_setzero_ph (),
1878 					     (__mmask8) -1, __C);
1879 }
1880 
1881 extern __inline __m128h
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1883 _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1884 			  __m128h __D, const int __E)
1885 {
1886   return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1887 					     __E);
1888 }
1889 
1890 extern __inline __m128h
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1892 _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1893 			   const int __D)
1894 {
1895   return __builtin_ia32_scalefsh_mask_round (__B, __C,
1896 					     _mm_setzero_ph (),
1897 					     __A, __D);
1898 }
1899 
1900 #else
1901 #define _mm_scalef_round_sh(A, B, C)				\
1902   (__builtin_ia32_scalefsh_mask_round ((A), (B),		\
1903 				       _mm_setzero_ph (),	\
1904 				       (__mmask8)-1, (C)))
1905 
1906 #define _mm_mask_scalef_round_sh(A, B, C, D, E)				\
1907   (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
1908 
1909 #define _mm_maskz_scalef_round_sh(A, B, C, D)				\
1910   (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (),	\
1911 				       (A), (D)))
1912 
1913 #endif /* __OPTIMIZE__ */
1914 
1915 /* Intrinsics vreduceph.  */
1916 #ifdef __OPTIMIZE__
1917 extern __inline __m512h
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_ph(__m512h __A,int __B)1919 _mm512_reduce_ph (__m512h __A, int __B)
1920 {
1921   return __builtin_ia32_reduceph512_mask_round (__A, __B,
1922 						_mm512_setzero_ph (),
1923 						(__mmask32) -1,
1924 						_MM_FROUND_CUR_DIRECTION);
1925 }
1926 
1927 extern __inline __m512h
1928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D)1929 _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
1930 {
1931   return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1932 						_MM_FROUND_CUR_DIRECTION);
1933 }
1934 
1935 extern __inline __m512h
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_ph(__mmask32 __A,__m512h __B,int __C)1937 _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
1938 {
1939   return __builtin_ia32_reduceph512_mask_round (__B, __C,
1940 						_mm512_setzero_ph (),
1941 						__A,
1942 						_MM_FROUND_CUR_DIRECTION);
1943 }
1944 
1945 extern __inline __m512h
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_round_ph(__m512h __A,int __B,const int __C)1947 _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
1948 {
1949   return __builtin_ia32_reduceph512_mask_round (__A, __B,
1950 						_mm512_setzero_ph (),
1951 						(__mmask32) -1, __C);
1952 }
1953 
1954 extern __inline __m512h
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_round_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D,const int __E)1956 _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1957 			     int __D, const int __E)
1958 {
1959   return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1960 						__E);
1961 }
1962 
1963 extern __inline __m512h
1964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_round_ph(__mmask32 __A,__m512h __B,int __C,const int __D)1965 _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
1966 			      const int __D)
1967 {
1968   return __builtin_ia32_reduceph512_mask_round (__B, __C,
1969 						_mm512_setzero_ph (),
1970 						__A, __D);
1971 }
1972 
1973 #else
1974 #define _mm512_reduce_ph(A, B)						\
1975   (__builtin_ia32_reduceph512_mask_round ((A), (B),			\
1976 					  _mm512_setzero_ph (),		\
1977 					  (__mmask32)-1,		\
1978 					  _MM_FROUND_CUR_DIRECTION))
1979 
1980 #define _mm512_mask_reduce_ph(A, B, C, D)				\
1981   (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B),		\
1982 					  _MM_FROUND_CUR_DIRECTION))
1983 
1984 #define _mm512_maskz_reduce_ph(A, B, C)					\
1985   (__builtin_ia32_reduceph512_mask_round ((B), (C),			\
1986 					  _mm512_setzero_ph (),		\
1987 					  (A), _MM_FROUND_CUR_DIRECTION))
1988 
1989 #define _mm512_reduce_round_ph(A, B, C)				\
1990   (__builtin_ia32_reduceph512_mask_round ((A), (B),		\
1991 					  _mm512_setzero_ph (),	\
1992 					  (__mmask32)-1, (C)))
1993 
1994 #define _mm512_mask_reduce_round_ph(A, B, C, D, E)			\
1995   (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
1996 
1997 #define _mm512_maskz_reduce_round_ph(A, B, C, D)		\
1998   (__builtin_ia32_reduceph512_mask_round ((B), (C),		\
1999 					  _mm512_setzero_ph (),	\
2000 					  (A), (D)))
2001 
2002 #endif /* __OPTIMIZE__ */
2003 
2004 /* Intrinsics vreducesh.  */
2005 #ifdef __OPTIMIZE__
2006 extern __inline __m128h
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_sh(__m128h __A,__m128h __B,int __C)2008 _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
2009 {
2010   return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2011 					     _mm_setzero_ph (),
2012 					     (__mmask8) -1,
2013 					     _MM_FROUND_CUR_DIRECTION);
2014 }
2015 
2016 extern __inline __m128h
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E)2018 _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
2019 		    __m128h __D, int __E)
2020 {
2021   return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
2022 					     _MM_FROUND_CUR_DIRECTION);
2023 }
2024 
2025 extern __inline __m128h
2026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D)2027 _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2028 {
2029   return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2030 					     _mm_setzero_ph (), __A,
2031 					     _MM_FROUND_CUR_DIRECTION);
2032 }
2033 
2034 extern __inline __m128h
2035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_round_sh(__m128h __A,__m128h __B,int __C,const int __D)2036 _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2037 {
2038   return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2039 					     _mm_setzero_ph (),
2040 					     (__mmask8) -1, __D);
2041 }
2042 
2043 extern __inline __m128h
2044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E,const int __F)2045 _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2046 			  __m128h __D, int __E, const int __F)
2047 {
2048   return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
2049 					     __B, __F);
2050 }
2051 
2052 extern __inline __m128h
2053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_round_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D,const int __E)2054 _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2055 			   int __D, const int __E)
2056 {
2057   return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2058 					     _mm_setzero_ph (),
2059 					     __A, __E);
2060 }
2061 
2062 #else
2063 #define _mm_reduce_sh(A, B, C)						\
2064   (__builtin_ia32_reducesh_mask_round ((A), (B), (C),			\
2065 				       _mm_setzero_ph (),		\
2066 				       (__mmask8)-1,			\
2067 				       _MM_FROUND_CUR_DIRECTION))
2068 
2069 #define _mm_mask_reduce_sh(A, B, C, D, E)				\
2070   (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B),		\
2071 				       _MM_FROUND_CUR_DIRECTION))
2072 
2073 #define _mm_maskz_reduce_sh(A, B, C, D)					\
2074   (__builtin_ia32_reducesh_mask_round ((B), (C), (D),			\
2075 				       _mm_setzero_ph (),		\
2076 				       (A), _MM_FROUND_CUR_DIRECTION))
2077 
2078 #define _mm_reduce_round_sh(A, B, C, D)				\
2079   (__builtin_ia32_reducesh_mask_round ((A), (B), (C),		\
2080 				       _mm_setzero_ph (),	\
2081 				       (__mmask8)-1, (D)))
2082 
2083 #define _mm_mask_reduce_round_sh(A, B, C, D, E, F)			\
2084   (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
2085 
2086 #define _mm_maskz_reduce_round_sh(A, B, C, D, E)		\
2087   (__builtin_ia32_reducesh_mask_round ((B), (C), (D),		\
2088 				       _mm_setzero_ph (),	\
2089 				       (A), (E)))
2090 
2091 #endif /* __OPTIMIZE__ */
2092 
2093 /* Intrinsics vrndscaleph.  */
2094 #ifdef __OPTIMIZE__
2095 extern __inline __m512h
2096   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_ph(__m512h __A,int __B)2097 _mm512_roundscale_ph (__m512h __A, int __B)
2098 {
2099   return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2100 						  _mm512_setzero_ph (),
2101 						  (__mmask32) -1,
2102 						  _MM_FROUND_CUR_DIRECTION);
2103 }
2104 
2105 extern __inline __m512h
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D)2107 _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
2108 			   __m512h __C, int __D)
2109 {
2110   return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
2111 						  _MM_FROUND_CUR_DIRECTION);
2112 }
2113 
2114 extern __inline __m512h
2115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_ph(__mmask32 __A,__m512h __B,int __C)2116 _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
2117 {
2118   return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2119 						  _mm512_setzero_ph (),
2120 						  __A,
2121 						  _MM_FROUND_CUR_DIRECTION);
2122 }
2123 
2124 extern __inline __m512h
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_round_ph(__m512h __A,int __B,const int __C)2126 _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
2127 {
2128   return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2129 						  _mm512_setzero_ph (),
2130 						  (__mmask32) -1,
2131 						  __C);
2132 }
2133 
2134 extern __inline __m512h
2135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_round_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D,const int __E)2136 _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
2137 				 __m512h __C, int __D, const int __E)
2138 {
2139   return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
2140 						  __B, __E);
2141 }
2142 
2143 extern __inline __m512h
2144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_round_ph(__mmask32 __A,__m512h __B,int __C,const int __D)2145 _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
2146 				  const int __D)
2147 {
2148   return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2149 						  _mm512_setzero_ph (),
2150 						  __A, __D);
2151 }
2152 
2153 #else
2154 #define _mm512_roundscale_ph(A, B)					\
2155   (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
2156 					    _mm512_setzero_ph (),	\
2157 					    (__mmask32)-1,		\
2158 					    _MM_FROUND_CUR_DIRECTION))
2159 
2160 #define _mm512_mask_roundscale_ph(A, B, C, D)				\
2161   (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B),		\
2162 					    _MM_FROUND_CUR_DIRECTION))
2163 
2164 #define _mm512_maskz_roundscale_ph(A, B, C)				\
2165   (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
2166 					    _mm512_setzero_ph (),	\
2167 					    (A),			\
2168 					    _MM_FROUND_CUR_DIRECTION))
2169 #define _mm512_roundscale_round_ph(A, B, C)				\
2170   (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
2171 					    _mm512_setzero_ph (),	\
2172 					    (__mmask32)-1, (C)))
2173 
2174 #define _mm512_mask_roundscale_round_ph(A, B, C, D, E)			\
2175   (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
2176 
2177 #define _mm512_maskz_roundscale_round_ph(A, B, C, D)			\
2178   (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
2179 					    _mm512_setzero_ph (),	\
2180 					    (A), (D)))
2181 
2182 #endif /* __OPTIMIZE__ */
2183 
2184 /* Intrinsics vrndscalesh.  */
2185 #ifdef __OPTIMIZE__
2186 extern __inline __m128h
2187   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_sh(__m128h __A,__m128h __B,int __C)2188 _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
2189 {
2190   return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2191 					       _mm_setzero_ph (),
2192 					       (__mmask8) -1,
2193 					       _MM_FROUND_CUR_DIRECTION);
2194 }
2195 
2196 extern __inline __m128h
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E)2198 _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
2199 			__m128h __D, int __E)
2200 {
2201   return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
2202 					       _MM_FROUND_CUR_DIRECTION);
2203 }
2204 
2205 extern __inline __m128h
2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D)2207 _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2208 {
2209   return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2210 					       _mm_setzero_ph (), __A,
2211 					       _MM_FROUND_CUR_DIRECTION);
2212 }
2213 
2214 extern __inline __m128h
2215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_round_sh(__m128h __A,__m128h __B,int __C,const int __D)2216 _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2217 {
2218   return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2219 					       _mm_setzero_ph (),
2220 					       (__mmask8) -1,
2221 					       __D);
2222 }
2223 
2224 extern __inline __m128h
2225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E,const int __F)2226 _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2227 			      __m128h __D, int __E, const int __F)
2228 {
2229   return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
2230 					       __A, __B, __F);
2231 }
2232 
2233 extern __inline __m128h
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_round_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D,const int __E)2235 _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2236 			       int __D, const int __E)
2237 {
2238   return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2239 					       _mm_setzero_ph (),
2240 					       __A, __E);
2241 }
2242 
2243 #else
2244 #define _mm_roundscale_sh(A, B, C)					\
2245   (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),			\
2246 					 _mm_setzero_ph (),		\
2247 					 (__mmask8)-1,			\
2248 					 _MM_FROUND_CUR_DIRECTION))
2249 
2250 #define _mm_mask_roundscale_sh(A, B, C, D, E)				\
2251   (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B),	\
2252 					 _MM_FROUND_CUR_DIRECTION))
2253 
2254 #define _mm_maskz_roundscale_sh(A, B, C, D)				\
2255   (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),			\
2256 					 _mm_setzero_ph (),		\
2257 					 (A), _MM_FROUND_CUR_DIRECTION))
2258 
2259 #define _mm_roundscale_round_sh(A, B, C, D)			\
2260   (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),		\
2261 					 _mm_setzero_ph (),	\
2262 					 (__mmask8)-1, (D)))
2263 
2264 #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F)			\
2265   (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
2266 
2267 #define _mm_maskz_roundscale_round_sh(A, B, C, D, E)		\
2268   (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),		\
2269 					 _mm_setzero_ph (),	\
2270 					 (A), (E)))
2271 
2272 #endif /* __OPTIMIZE__ */
2273 
2274 /* Intrinsics vfpclasssh.  */
2275 #ifdef __OPTIMIZE__
2276 extern __inline __mmask8
2277   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fpclass_sh_mask(__m128h __A,const int __imm)2278 _mm_fpclass_sh_mask (__m128h __A, const int __imm)
2279 {
2280   return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
2281 						   (__mmask8) -1);
2282 }
2283 
2284 extern __inline __mmask8
2285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fpclass_sh_mask(__mmask8 __U,__m128h __A,const int __imm)2286 _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
2287 {
2288   return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
2289 }
2290 
2291 #else
2292 #define _mm_fpclass_sh_mask(X, C)					\
2293   ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X),	\
2294 					     (int) (C), (__mmask8) (-1))) \
2295 
2296 #define _mm_mask_fpclass_sh_mask(U, X, C)				\
2297   ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X),	\
2298 					     (int) (C), (__mmask8) (U)))
2299 #endif /* __OPTIMIZE__ */
2300 
2301 /* Intrinsics vfpclassph.  */
2302 #ifdef __OPTIMIZE__
2303 extern __inline __mmask32
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fpclass_ph_mask(__mmask32 __U,__m512h __A,const int __imm)2305 _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
2306 			     const int __imm)
2307 {
2308   return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2309 						       __imm, __U);
2310 }
2311 
2312 extern __inline __mmask32
2313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fpclass_ph_mask(__m512h __A,const int __imm)2314 _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
2315 {
2316   return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2317 						       __imm,
2318 						       (__mmask32) -1);
2319 }
2320 
2321 #else
2322 #define _mm512_mask_fpclass_ph_mask(u, x, c)				\
2323   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2324 						 (int) (c),(__mmask8)(u)))
2325 
2326 #define _mm512_fpclass_ph_mask(x, c)                                    \
2327   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2328 						 (int) (c),(__mmask8)-1))
2329 #endif /* __OPIMTIZE__ */
2330 
2331 /* Intrinsics vgetexpph, vgetexpsh.  */
2332 extern __inline __m128h
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_sh(__m128h __A,__m128h __B)2334 _mm_getexp_sh (__m128h __A, __m128h __B)
2335 {
2336   return (__m128h)
2337     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2338 					(__v8hf) _mm_setzero_ph (),
2339 					(__mmask8) -1,
2340 					_MM_FROUND_CUR_DIRECTION);
2341 }
2342 
2343 extern __inline __m128h
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)2345 _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2346 {
2347   return (__m128h)
2348     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2349 					(__v8hf) __W, (__mmask8) __U,
2350 					_MM_FROUND_CUR_DIRECTION);
2351 }
2352 
2353 extern __inline __m128h
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_sh(__mmask8 __U,__m128h __A,__m128h __B)2355 _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
2356 {
2357   return (__m128h)
2358     __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2359 					(__v8hf) _mm_setzero_ph (),
2360 					(__mmask8) __U,
2361 					_MM_FROUND_CUR_DIRECTION);
2362 }
2363 
2364 extern __inline __m512h
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_ph(__m512h __A)2366 _mm512_getexp_ph (__m512h __A)
2367 {
2368   return (__m512h)
2369     __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2370 				     (__v32hf) _mm512_setzero_ph (),
2371 				     (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
2372 }
2373 
2374 extern __inline __m512h
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getexp_ph(__m512h __W,__mmask32 __U,__m512h __A)2376 _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
2377 {
2378   return (__m512h)
2379     __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
2380 				     (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2381 }
2382 
2383 extern __inline __m512h
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getexp_ph(__mmask32 __U,__m512h __A)2385 _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
2386 {
2387   return (__m512h)
2388     __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2389 				     (__v32hf) _mm512_setzero_ph (),
2390 				     (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2391 }
2392 
2393 #ifdef __OPTIMIZE__
2394 extern __inline __m128h
2395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_round_sh(__m128h __A,__m128h __B,const int __R)2396 _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
2397 {
2398   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2399 						       (__v8hf) __B,
2400 						       _mm_setzero_ph (),
2401 						       (__mmask8) -1,
2402 						       __R);
2403 }
2404 
2405 extern __inline __m128h
2406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)2407 _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2408 			  __m128h __B, const int __R)
2409 {
2410   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2411 						       (__v8hf) __B,
2412 						       (__v8hf) __W,
2413 						       (__mmask8) __U, __R);
2414 }
2415 
2416 extern __inline __m128h
2417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_round_sh(__mmask8 __U,__m128h __A,__m128h __B,const int __R)2418 _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2419 			   const int __R)
2420 {
2421   return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2422 						       (__v8hf) __B,
2423 						       (__v8hf)
2424 						       _mm_setzero_ph (),
2425 						       (__mmask8) __U, __R);
2426 }
2427 
2428 extern __inline __m512h
2429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_round_ph(__m512h __A,const int __R)2430 _mm512_getexp_round_ph (__m512h __A, const int __R)
2431 {
2432   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2433 						    (__v32hf)
2434 						    _mm512_setzero_ph (),
2435 						    (__mmask32) -1, __R);
2436 }
2437 
2438 extern __inline __m512h
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getexp_round_ph(__m512h __W,__mmask32 __U,__m512h __A,const int __R)2440 _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2441 			     const int __R)
2442 {
2443   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2444 						    (__v32hf) __W,
2445 						    (__mmask32) __U, __R);
2446 }
2447 
2448 extern __inline __m512h
2449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getexp_round_ph(__mmask32 __U,__m512h __A,const int __R)2450 _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
2451 {
2452   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2453 						    (__v32hf)
2454 						    _mm512_setzero_ph (),
2455 						    (__mmask32) __U, __R);
2456 }
2457 
2458 #else
2459 #define _mm_getexp_round_sh(A, B, R)					\
2460   ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),	\
2461 					       (__v8hf)(__m128h)(B),	\
2462 					       (__v8hf)_mm_setzero_ph(), \
2463 					       (__mmask8)-1, R))
2464 
2465 #define _mm_mask_getexp_round_sh(W, U, A, B, C)			\
2466   (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
2467 
2468 #define _mm_maskz_getexp_round_sh(U, A, B, C)				\
2469   (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,			\
2470 					      (__v8hf)_mm_setzero_ph(),	\
2471 					      U, C)
2472 
2473 #define _mm512_getexp_round_ph(A, R)					\
2474   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
2475 					    (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
2476 
2477 #define _mm512_mask_getexp_round_ph(W, U, A, R)				\
2478   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
2479 					    (__v32hf)(__m512h)(W), (__mmask32)(U), R))
2480 
2481 #define _mm512_maskz_getexp_round_ph(U, A, R)				\
2482   ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
2483 					    (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
2484 
2485 #endif /* __OPTIMIZE__ */
2486 
2487 /* Intrinsics vgetmantph, vgetmantsh.  */
2488 #ifdef __OPTIMIZE__
2489 extern __inline __m128h
2490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_sh(__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2491 _mm_getmant_sh (__m128h __A, __m128h __B,
2492 		_MM_MANTISSA_NORM_ENUM __C,
2493 		_MM_MANTISSA_SIGN_ENUM __D)
2494 {
2495   return (__m128h)
2496     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2497 					 (__D << 2) | __C, _mm_setzero_ph (),
2498 					 (__mmask8) -1,
2499 					 _MM_FROUND_CUR_DIRECTION);
2500 }
2501 
2502 extern __inline __m128h
2503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2504 _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
2505 		     __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2506 		     _MM_MANTISSA_SIGN_ENUM __D)
2507 {
2508   return (__m128h)
2509     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2510 					 (__D << 2) | __C, (__v8hf) __W,
2511 					 __U, _MM_FROUND_CUR_DIRECTION);
2512 }
2513 
2514 extern __inline __m128h
2515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_sh(__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2516 _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
2517 		      _MM_MANTISSA_NORM_ENUM __C,
2518 		      _MM_MANTISSA_SIGN_ENUM __D)
2519 {
2520   return (__m128h)
2521     __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2522 					 (__D << 2) | __C,
2523 					 (__v8hf) _mm_setzero_ph(),
2524 					 __U, _MM_FROUND_CUR_DIRECTION);
2525 }
2526 
2527 extern __inline __m512h
2528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_ph(__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2529 _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2530 		   _MM_MANTISSA_SIGN_ENUM __C)
2531 {
2532   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2533 						     (__C << 2) | __B,
2534 						     _mm512_setzero_ph (),
2535 						     (__mmask32) -1,
2536 						     _MM_FROUND_CUR_DIRECTION);
2537 }
2538 
2539 extern __inline __m512h
2540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getmant_ph(__m512h __W,__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2541 _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
2542 			_MM_MANTISSA_NORM_ENUM __B,
2543 			_MM_MANTISSA_SIGN_ENUM __C)
2544 {
2545   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2546 						     (__C << 2) | __B,
2547 						     (__v32hf) __W, __U,
2548 						     _MM_FROUND_CUR_DIRECTION);
2549 }
2550 
2551 extern __inline __m512h
2552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getmant_ph(__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2553 _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
2554 			 _MM_MANTISSA_NORM_ENUM __B,
2555 			 _MM_MANTISSA_SIGN_ENUM __C)
2556 {
2557   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2558 						     (__C << 2) | __B,
2559 						     (__v32hf)
2560 						     _mm512_setzero_ph (),
2561 						     __U,
2562 						     _MM_FROUND_CUR_DIRECTION);
2563 }
2564 
2565 extern __inline __m128h
2566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_round_sh(__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2567 _mm_getmant_round_sh (__m128h __A, __m128h __B,
2568 		      _MM_MANTISSA_NORM_ENUM __C,
2569 		      _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2570 {
2571   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2572 							(__v8hf) __B,
2573 							(__D << 2) | __C,
2574 							_mm_setzero_ph (),
2575 							(__mmask8) -1,
2576 							__R);
2577 }
2578 
2579 extern __inline __m128h
2580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2581 _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2582 			   __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2583 			   _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2584 {
2585   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2586 							(__v8hf) __B,
2587 							(__D << 2) | __C,
2588 							(__v8hf) __W,
2589 							__U, __R);
2590 }
2591 
2592 extern __inline __m128h
2593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_round_sh(__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2594 _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2595 			    _MM_MANTISSA_NORM_ENUM __C,
2596 			    _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2597 {
2598   return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2599 							(__v8hf) __B,
2600 							(__D << 2) | __C,
2601 							(__v8hf)
2602 							_mm_setzero_ph(),
2603 							__U, __R);
2604 }
2605 
2606 extern __inline __m512h
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_round_ph(__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2608 _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2609 			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2610 {
2611   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2612 						     (__C << 2) | __B,
2613 						     _mm512_setzero_ph (),
2614 						     (__mmask32) -1, __R);
2615 }
2616 
2617 extern __inline __m512h
2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getmant_round_ph(__m512h __W,__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2619 _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2620 			      _MM_MANTISSA_NORM_ENUM __B,
2621 			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2622 {
2623   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2624 						     (__C << 2) | __B,
2625 						     (__v32hf) __W, __U,
2626 						     __R);
2627 }
2628 
2629 extern __inline __m512h
2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getmant_round_ph(__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2631 _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
2632 			       _MM_MANTISSA_NORM_ENUM __B,
2633 			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2634 {
2635   return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2636 						     (__C << 2) | __B,
2637 						     (__v32hf)
2638 						     _mm512_setzero_ph (),
2639 						     __U, __R);
2640 }
2641 
2642 #else
2643 #define _mm512_getmant_ph(X, B, C)					\
2644   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2645 					      (int)(((C)<<2) | (B)),	\
2646 					      (__v32hf)(__m512h)	\
2647 					      _mm512_setzero_ph(),	\
2648 					      (__mmask32)-1,		\
2649 					      _MM_FROUND_CUR_DIRECTION))
2650 
2651 #define _mm512_mask_getmant_ph(W, U, X, B, C)				\
2652   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2653 					      (int)(((C)<<2) | (B)),	\
2654 					      (__v32hf)(__m512h)(W),	\
2655 					      (__mmask32)(U),		\
2656 					      _MM_FROUND_CUR_DIRECTION))
2657 
2658 
2659 #define _mm512_maskz_getmant_ph(U, X, B, C)				\
2660   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2661 					      (int)(((C)<<2) | (B)),	\
2662 					      (__v32hf)(__m512h)	\
2663 					      _mm512_setzero_ph(),	\
2664 					      (__mmask32)(U),		\
2665 					      _MM_FROUND_CUR_DIRECTION))
2666 
2667 #define _mm_getmant_sh(X, Y, C, D)					\
2668   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2669 						 (__v8hf)(__m128h)(Y),	\
2670 						 (int)(((D)<<2) | (C)),	\
2671 						 (__v8hf)(__m128h)	\
2672 						 _mm_setzero_ph (),	\
2673 						 (__mmask8)-1,		\
2674 						 _MM_FROUND_CUR_DIRECTION))
2675 
2676 #define _mm_mask_getmant_sh(W, U, X, Y, C, D)				\
2677   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2678 						 (__v8hf)(__m128h)(Y),	\
2679 						 (int)(((D)<<2) | (C)),	\
2680 						 (__v8hf)(__m128h)(W),	\
2681 						 (__mmask8)(U),		\
2682 						 _MM_FROUND_CUR_DIRECTION))
2683 
2684 #define _mm_maskz_getmant_sh(U, X, Y, C, D)				\
2685   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2686 						 (__v8hf)(__m128h)(Y),	\
2687 						 (int)(((D)<<2) | (C)),	\
2688 						 (__v8hf)(__m128h)	\
2689 						 _mm_setzero_ph(),	\
2690 						 (__mmask8)(U),		\
2691 						 _MM_FROUND_CUR_DIRECTION))
2692 
2693 #define _mm512_getmant_round_ph(X, B, C, R)				\
2694   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2695 					      (int)(((C)<<2) | (B)),	\
2696 					      (__v32hf)(__m512h)	\
2697 					      _mm512_setzero_ph(),	\
2698 					      (__mmask32)-1,		\
2699 					      (R)))
2700 
2701 #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R)			\
2702   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2703 					      (int)(((C)<<2) | (B)),	\
2704 					      (__v32hf)(__m512h)(W),	\
2705 					      (__mmask32)(U),		\
2706 					      (R)))
2707 
2708 
2709 #define _mm512_maskz_getmant_round_ph(U, X, B, C, R)			\
2710   ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X),	\
2711 					      (int)(((C)<<2) | (B)),	\
2712 					      (__v32hf)(__m512h)	\
2713 					      _mm512_setzero_ph(),	\
2714 					      (__mmask32)(U),		\
2715 					      (R)))
2716 
2717 #define _mm_getmant_round_sh(X, Y, C, D, R)				\
2718   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2719 						 (__v8hf)(__m128h)(Y),	\
2720 						 (int)(((D)<<2) | (C)),	\
2721 						 (__v8hf)(__m128h)	\
2722 						 _mm_setzero_ph (),	\
2723 						 (__mmask8)-1,		\
2724 						 (R)))
2725 
2726 #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R)			\
2727   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2728 						 (__v8hf)(__m128h)(Y),	\
2729 						 (int)(((D)<<2) | (C)),	\
2730 						 (__v8hf)(__m128h)(W),	\
2731 						 (__mmask8)(U),		\
2732 						 (R)))
2733 
2734 #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R)			\
2735   ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X),	\
2736 						 (__v8hf)(__m128h)(Y),	\
2737 						 (int)(((D)<<2) | (C)),	\
2738 						 (__v8hf)(__m128h)	\
2739 						 _mm_setzero_ph(),	\
2740 						 (__mmask8)(U),		\
2741 						 (R)))
2742 
2743 #endif /* __OPTIMIZE__ */
2744 
2745 /* Intrinsics vmovw.  */
2746 extern __inline __m128i
2747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi16_si128(short __A)2748 _mm_cvtsi16_si128 (short __A)
2749 {
2750   return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
2751 }
2752 
2753 extern __inline short
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si16(__m128i __A)2755 _mm_cvtsi128_si16 (__m128i __A)
2756 {
2757   return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
2758 }
2759 
2760 /* Intrinsics vmovsh.  */
2761 extern __inline __m128h
2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_sh(__m128h __A,__mmask8 __B,_Float16 const * __C)2763 _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
2764 {
2765   return __builtin_ia32_loadsh_mask (__C, __A, __B);
2766 }
2767 
2768 extern __inline __m128h
2769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_sh(__mmask8 __A,_Float16 const * __B)2770 _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
2771 {
2772   return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
2773 }
2774 
2775 extern __inline void
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_sh(_Float16 const * __A,__mmask8 __B,__m128h __C)2777 _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
2778 {
2779   __builtin_ia32_storesh_mask (__A,  __C, __B);
2780 }
2781 
2782 extern __inline __m128h
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sh(__m128h __A,__m128h __B)2784 _mm_move_sh (__m128h __A, __m128h  __B)
2785 {
2786   __A[0] = __B[0];
2787   return __A;
2788 }
2789 
2790 extern __inline __m128h
2791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_move_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)2792 _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h  __C, __m128h __D)
2793 {
2794   return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
2795 }
2796 
2797 extern __inline __m128h
2798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_move_sh(__mmask8 __A,__m128h __B,__m128h __C)2799 _mm_maskz_move_sh (__mmask8 __A, __m128h  __B, __m128h __C)
2800 {
2801   return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
2802 }
2803 
2804 /* Intrinsics vcvtph2dq.  */
2805 extern __inline __m512i
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi32(__m256h __A)2807 _mm512_cvtph_epi32 (__m256h __A)
2808 {
2809   return (__m512i)
2810     __builtin_ia32_vcvtph2dq512_mask_round (__A,
2811 					    (__v16si)
2812 					    _mm512_setzero_si512 (),
2813 					    (__mmask16) -1,
2814 					    _MM_FROUND_CUR_DIRECTION);
2815 }
2816 
2817 extern __inline __m512i
2818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi32(__m512i __A,__mmask16 __B,__m256h __C)2819 _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
2820 {
2821   return (__m512i)
2822     __builtin_ia32_vcvtph2dq512_mask_round (__C,
2823 					    (__v16si) __A,
2824 					    __B,
2825 					    _MM_FROUND_CUR_DIRECTION);
2826 }
2827 
2828 extern __inline __m512i
2829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi32(__mmask16 __A,__m256h __B)2830 _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
2831 {
2832   return (__m512i)
2833     __builtin_ia32_vcvtph2dq512_mask_round (__B,
2834 					    (__v16si)
2835 					    _mm512_setzero_si512 (),
2836 					    __A,
2837 					    _MM_FROUND_CUR_DIRECTION);
2838 }
2839 
2840 #ifdef __OPTIMIZE__
2841 extern __inline __m512i
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi32(__m256h __A,int __B)2843 _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
2844 {
2845   return (__m512i)
2846     __builtin_ia32_vcvtph2dq512_mask_round (__A,
2847 					    (__v16si)
2848 					    _mm512_setzero_si512 (),
2849 					    (__mmask16) -1,
2850 					    __B);
2851 }
2852 
2853 extern __inline __m512i
2854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi32(__m512i __A,__mmask16 __B,__m256h __C,int __D)2855 _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2856 {
2857   return (__m512i)
2858     __builtin_ia32_vcvtph2dq512_mask_round (__C,
2859 					    (__v16si) __A,
2860 					    __B,
2861 					    __D);
2862 }
2863 
2864 extern __inline __m512i
2865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi32(__mmask16 __A,__m256h __B,int __C)2866 _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
2867 {
2868   return (__m512i)
2869     __builtin_ia32_vcvtph2dq512_mask_round (__B,
2870 					    (__v16si)
2871 					    _mm512_setzero_si512 (),
2872 					    __A,
2873 					    __C);
2874 }
2875 
2876 #else
2877 #define _mm512_cvt_roundph_epi32(A, B)					\
2878   ((__m512i)								\
2879    __builtin_ia32_vcvtph2dq512_mask_round ((A),				\
2880 					   (__v16si)			\
2881 					   _mm512_setzero_si512 (),	\
2882 					   (__mmask16)-1,		\
2883 					   (B)))
2884 
2885 #define _mm512_mask_cvt_roundph_epi32(A, B, C, D)			\
2886   ((__m512i)								\
2887    __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
2888 
2889 #define _mm512_maskz_cvt_roundph_epi32(A, B, C)				\
2890   ((__m512i)								\
2891    __builtin_ia32_vcvtph2dq512_mask_round ((B),				\
2892 					   (__v16si)			\
2893 					   _mm512_setzero_si512 (),	\
2894 					   (A),				\
2895 					   (C)))
2896 
2897 #endif /* __OPTIMIZE__ */
2898 
2899 /* Intrinsics vcvtph2udq.  */
2900 extern __inline __m512i
2901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu32(__m256h __A)2902 _mm512_cvtph_epu32 (__m256h __A)
2903 {
2904   return (__m512i)
2905     __builtin_ia32_vcvtph2udq512_mask_round (__A,
2906 					     (__v16si)
2907 					     _mm512_setzero_si512 (),
2908 					     (__mmask16) -1,
2909 					     _MM_FROUND_CUR_DIRECTION);
2910 }
2911 
2912 extern __inline __m512i
2913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu32(__m512i __A,__mmask16 __B,__m256h __C)2914 _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
2915 {
2916   return (__m512i)
2917     __builtin_ia32_vcvtph2udq512_mask_round (__C,
2918 					     (__v16si) __A,
2919 					     __B,
2920 					     _MM_FROUND_CUR_DIRECTION);
2921 }
2922 
2923 extern __inline __m512i
2924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu32(__mmask16 __A,__m256h __B)2925 _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
2926 {
2927   return (__m512i)
2928     __builtin_ia32_vcvtph2udq512_mask_round (__B,
2929 					     (__v16si)
2930 					     _mm512_setzero_si512 (),
2931 					     __A,
2932 					     _MM_FROUND_CUR_DIRECTION);
2933 }
2934 
2935 #ifdef __OPTIMIZE__
2936 extern __inline __m512i
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu32(__m256h __A,int __B)2938 _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
2939 {
2940   return (__m512i)
2941     __builtin_ia32_vcvtph2udq512_mask_round (__A,
2942 					     (__v16si)
2943 					     _mm512_setzero_si512 (),
2944 					     (__mmask16) -1,
2945 					     __B);
2946 }
2947 
2948 extern __inline __m512i
2949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu32(__m512i __A,__mmask16 __B,__m256h __C,int __D)2950 _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2951 {
2952   return (__m512i)
2953     __builtin_ia32_vcvtph2udq512_mask_round (__C,
2954 					     (__v16si) __A,
2955 					     __B,
2956 					     __D);
2957 }
2958 
2959 extern __inline __m512i
2960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu32(__mmask16 __A,__m256h __B,int __C)2961 _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
2962 {
2963   return (__m512i)
2964     __builtin_ia32_vcvtph2udq512_mask_round (__B,
2965 					     (__v16si)
2966 					     _mm512_setzero_si512 (),
2967 					     __A,
2968 					     __C);
2969 }
2970 
2971 #else
2972 #define _mm512_cvt_roundph_epu32(A, B)					\
2973   ((__m512i)								\
2974    __builtin_ia32_vcvtph2udq512_mask_round ((A),			\
2975 					    (__v16si)			\
2976 					    _mm512_setzero_si512 (),	\
2977 					    (__mmask16)-1,		\
2978 					    (B)))
2979 
2980 #define _mm512_mask_cvt_roundph_epu32(A, B, C, D)			\
2981   ((__m512i)								\
2982    __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
2983 
2984 #define _mm512_maskz_cvt_roundph_epu32(A, B, C)				\
2985   ((__m512i)								\
2986    __builtin_ia32_vcvtph2udq512_mask_round ((B),			\
2987 					    (__v16si)			\
2988 					    _mm512_setzero_si512 (),	\
2989 					    (A),			\
2990 					    (C)))
2991 
2992 #endif /* __OPTIMIZE__ */
2993 
2994 /* Intrinsics vcvttph2dq.  */
2995 extern __inline __m512i
2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi32(__m256h __A)2997 _mm512_cvttph_epi32 (__m256h __A)
2998 {
2999   return (__m512i)
3000     __builtin_ia32_vcvttph2dq512_mask_round (__A,
3001 					     (__v16si)
3002 					     _mm512_setzero_si512 (),
3003 					     (__mmask16) -1,
3004 					     _MM_FROUND_CUR_DIRECTION);
3005 }
3006 
3007 extern __inline __m512i
3008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi32(__m512i __A,__mmask16 __B,__m256h __C)3009 _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
3010 {
3011   return (__m512i)
3012     __builtin_ia32_vcvttph2dq512_mask_round (__C,
3013 					     (__v16si) __A,
3014 					     __B,
3015 					     _MM_FROUND_CUR_DIRECTION);
3016 }
3017 
3018 extern __inline __m512i
3019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi32(__mmask16 __A,__m256h __B)3020 _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
3021 {
3022   return (__m512i)
3023     __builtin_ia32_vcvttph2dq512_mask_round (__B,
3024 					     (__v16si)
3025 					     _mm512_setzero_si512 (),
3026 					     __A,
3027 					     _MM_FROUND_CUR_DIRECTION);
3028 }
3029 
3030 #ifdef __OPTIMIZE__
3031 extern __inline __m512i
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi32(__m256h __A,int __B)3033 _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
3034 {
3035   return (__m512i)
3036     __builtin_ia32_vcvttph2dq512_mask_round (__A,
3037 					     (__v16si)
3038 					     _mm512_setzero_si512 (),
3039 					     (__mmask16) -1,
3040 					     __B);
3041 }
3042 
3043 extern __inline __m512i
3044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi32(__m512i __A,__mmask16 __B,__m256h __C,int __D)3045 _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
3046 				__m256h __C, int __D)
3047 {
3048   return (__m512i)
3049     __builtin_ia32_vcvttph2dq512_mask_round (__C,
3050 					     (__v16si) __A,
3051 					     __B,
3052 					     __D);
3053 }
3054 
3055 extern __inline __m512i
3056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi32(__mmask16 __A,__m256h __B,int __C)3057 _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
3058 {
3059   return (__m512i)
3060     __builtin_ia32_vcvttph2dq512_mask_round (__B,
3061 					     (__v16si)
3062 					     _mm512_setzero_si512 (),
3063 					     __A,
3064 					     __C);
3065 }
3066 
3067 #else
3068 #define _mm512_cvtt_roundph_epi32(A, B)					\
3069   ((__m512i)								\
3070    __builtin_ia32_vcvttph2dq512_mask_round ((A),			\
3071 					    (__v16si)			\
3072 					    (_mm512_setzero_si512 ()),	\
3073 					    (__mmask16)(-1), (B)))
3074 
3075 #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D)		\
3076   ((__m512i)							\
3077    __builtin_ia32_vcvttph2dq512_mask_round ((C),		\
3078 					    (__v16si)(A),	\
3079 					    (B),		\
3080 					    (D)))
3081 
3082 #define _mm512_maskz_cvtt_roundph_epi32(A, B, C)			\
3083   ((__m512i)								\
3084    __builtin_ia32_vcvttph2dq512_mask_round ((B),			\
3085 					    (__v16si)			\
3086 					    _mm512_setzero_si512 (),	\
3087 					    (A),			\
3088 					    (C)))
3089 
3090 #endif /* __OPTIMIZE__ */
3091 
3092 /* Intrinsics vcvttph2udq.  */
3093 extern __inline __m512i
3094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu32(__m256h __A)3095 _mm512_cvttph_epu32 (__m256h __A)
3096 {
3097   return (__m512i)
3098     __builtin_ia32_vcvttph2udq512_mask_round (__A,
3099 					      (__v16si)
3100 					      _mm512_setzero_si512 (),
3101 					      (__mmask16) -1,
3102 					      _MM_FROUND_CUR_DIRECTION);
3103 }
3104 
3105 extern __inline __m512i
3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu32(__m512i __A,__mmask16 __B,__m256h __C)3107 _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
3108 {
3109   return (__m512i)
3110     __builtin_ia32_vcvttph2udq512_mask_round (__C,
3111 					      (__v16si) __A,
3112 					      __B,
3113 					      _MM_FROUND_CUR_DIRECTION);
3114 }
3115 
3116 extern __inline __m512i
3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu32(__mmask16 __A,__m256h __B)3118 _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
3119 {
3120   return (__m512i)
3121     __builtin_ia32_vcvttph2udq512_mask_round (__B,
3122 					      (__v16si)
3123 					      _mm512_setzero_si512 (),
3124 					      __A,
3125 					      _MM_FROUND_CUR_DIRECTION);
3126 }
3127 
3128 #ifdef __OPTIMIZE__
3129 extern __inline __m512i
3130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu32(__m256h __A,int __B)3131 _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
3132 {
3133   return (__m512i)
3134     __builtin_ia32_vcvttph2udq512_mask_round (__A,
3135 					      (__v16si)
3136 					      _mm512_setzero_si512 (),
3137 					      (__mmask16) -1,
3138 					      __B);
3139 }
3140 
3141 extern __inline __m512i
3142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu32(__m512i __A,__mmask16 __B,__m256h __C,int __D)3143 _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
3144 				__m256h __C, int __D)
3145 {
3146   return (__m512i)
3147     __builtin_ia32_vcvttph2udq512_mask_round (__C,
3148 					      (__v16si) __A,
3149 					      __B,
3150 					      __D);
3151 }
3152 
3153 extern __inline __m512i
3154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu32(__mmask16 __A,__m256h __B,int __C)3155 _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
3156 {
3157   return (__m512i)
3158     __builtin_ia32_vcvttph2udq512_mask_round (__B,
3159 					      (__v16si)
3160 					      _mm512_setzero_si512 (),
3161 					      __A,
3162 					      __C);
3163 }
3164 
3165 #else
3166 #define _mm512_cvtt_roundph_epu32(A, B)					\
3167   ((__m512i)								\
3168    __builtin_ia32_vcvttph2udq512_mask_round ((A),			\
3169 					     (__v16si)			\
3170 					     _mm512_setzero_si512 (),	\
3171 					     (__mmask16)-1,		\
3172 					     (B)))
3173 
3174 #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D)		\
3175   ((__m512i)							\
3176    __builtin_ia32_vcvttph2udq512_mask_round ((C),		\
3177 					     (__v16si)(A),	\
3178 					     (B),		\
3179 					     (D)))
3180 
3181 #define _mm512_maskz_cvtt_roundph_epu32(A, B, C)			\
3182   ((__m512i)								\
3183    __builtin_ia32_vcvttph2udq512_mask_round ((B),			\
3184 					     (__v16si)			\
3185 					     _mm512_setzero_si512 (),	\
3186 					     (A),			\
3187 					     (C)))
3188 
3189 #endif /* __OPTIMIZE__ */
3190 
3191 /* Intrinsics vcvtdq2ph.  */
3192 extern __inline __m256h
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_ph(__m512i __A)3194 _mm512_cvtepi32_ph (__m512i __A)
3195 {
3196   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3197 						 _mm256_setzero_ph (),
3198 						 (__mmask16) -1,
3199 						 _MM_FROUND_CUR_DIRECTION);
3200 }
3201 
3202 extern __inline __m256h
3203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi32_ph(__m256h __A,__mmask16 __B,__m512i __C)3204 _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3205 {
3206   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3207 						 __A,
3208 						 __B,
3209 						 _MM_FROUND_CUR_DIRECTION);
3210 }
3211 
3212 extern __inline __m256h
3213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi32_ph(__mmask16 __A,__m512i __B)3214 _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
3215 {
3216   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3217 						 _mm256_setzero_ph (),
3218 						 __A,
3219 						 _MM_FROUND_CUR_DIRECTION);
3220 }
3221 
3222 #ifdef __OPTIMIZE__
3223 extern __inline __m256h
3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi32_ph(__m512i __A,int __B)3225 _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
3226 {
3227   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3228 						 _mm256_setzero_ph (),
3229 						 (__mmask16) -1,
3230 						 __B);
3231 }
3232 
3233 extern __inline __m256h
3234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi32_ph(__m256h __A,__mmask16 __B,__m512i __C,int __D)3235 _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3236 {
3237   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3238 						 __A,
3239 						 __B,
3240 						 __D);
3241 }
3242 
3243 extern __inline __m256h
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi32_ph(__mmask16 __A,__m512i __B,int __C)3245 _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
3246 {
3247   return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3248 						 _mm256_setzero_ph (),
3249 						 __A,
3250 						 __C);
3251 }
3252 
3253 #else
3254 #define _mm512_cvt_roundepi32_ph(A, B)					\
3255   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A),		\
3256 					   _mm256_setzero_ph (),	\
3257 					   (__mmask16)-1,		\
3258 					   (B)))
3259 
3260 #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D)		\
3261   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C),	\
3262 					   (A),			\
3263 					   (B),			\
3264 					   (D)))
3265 
3266 #define _mm512_maskz_cvt_roundepi32_ph(A, B, C)				\
3267   (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B),		\
3268 					   _mm256_setzero_ph (),	\
3269 					   (A),				\
3270 					   (C)))
3271 
3272 #endif /* __OPTIMIZE__ */
3273 
3274 /* Intrinsics vcvtudq2ph.  */
3275 extern __inline __m256h
3276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu32_ph(__m512i __A)3277 _mm512_cvtepu32_ph (__m512i __A)
3278 {
3279   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3280 						  _mm256_setzero_ph (),
3281 						  (__mmask16) -1,
3282 						  _MM_FROUND_CUR_DIRECTION);
3283 }
3284 
3285 extern __inline __m256h
3286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu32_ph(__m256h __A,__mmask16 __B,__m512i __C)3287 _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3288 {
3289   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3290 						  __A,
3291 						  __B,
3292 						  _MM_FROUND_CUR_DIRECTION);
3293 }
3294 
3295 extern __inline __m256h
3296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu32_ph(__mmask16 __A,__m512i __B)3297 _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
3298 {
3299   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3300 						  _mm256_setzero_ph (),
3301 						  __A,
3302 						  _MM_FROUND_CUR_DIRECTION);
3303 }
3304 
3305 #ifdef __OPTIMIZE__
3306 extern __inline __m256h
3307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu32_ph(__m512i __A,int __B)3308 _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
3309 {
3310   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3311 						  _mm256_setzero_ph (),
3312 						  (__mmask16) -1,
3313 						  __B);
3314 }
3315 
3316 extern __inline __m256h
3317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu32_ph(__m256h __A,__mmask16 __B,__m512i __C,int __D)3318 _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3319 {
3320   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3321 						  __A,
3322 						  __B,
3323 						  __D);
3324 }
3325 
3326 extern __inline __m256h
3327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu32_ph(__mmask16 __A,__m512i __B,int __C)3328 _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
3329 {
3330   return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3331 						  _mm256_setzero_ph (),
3332 						  __A,
3333 						  __C);
3334 }
3335 
3336 #else
3337 #define _mm512_cvt_roundepu32_ph(A, B)					\
3338   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A),		\
3339 					    _mm256_setzero_ph (),	\
3340 					    (__mmask16)-1,		\
3341 					    B))
3342 
3343 #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D)	\
3344   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C,	\
3345 					    A,		\
3346 					    B,		\
3347 					    D))
3348 
3349 #define _mm512_maskz_cvt_roundepu32_ph(A, B, C)				\
3350   (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B,			\
3351 					    _mm256_setzero_ph (),	\
3352 					    A,				\
3353 					    C))
3354 
3355 #endif /* __OPTIMIZE__ */
3356 
3357 /* Intrinsics vcvtph2qq.  */
3358 extern __inline __m512i
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi64(__m128h __A)3360 _mm512_cvtph_epi64 (__m128h __A)
3361 {
3362   return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3363 						 _mm512_setzero_si512 (),
3364 						 (__mmask8) -1,
3365 						 _MM_FROUND_CUR_DIRECTION);
3366 }
3367 
3368 extern __inline __m512i
3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi64(__m512i __A,__mmask8 __B,__m128h __C)3370 _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3371 {
3372   return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
3373 						 _MM_FROUND_CUR_DIRECTION);
3374 }
3375 
3376 extern __inline __m512i
3377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi64(__mmask8 __A,__m128h __B)3378 _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
3379 {
3380   return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3381 						 _mm512_setzero_si512 (),
3382 						 __A,
3383 						 _MM_FROUND_CUR_DIRECTION);
3384 }
3385 
3386 #ifdef __OPTIMIZE__
3387 extern __inline __m512i
3388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi64(__m128h __A,int __B)3389 _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
3390 {
3391   return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3392 						 _mm512_setzero_si512 (),
3393 						 (__mmask8) -1,
3394 						 __B);
3395 }
3396 
3397 extern __inline __m512i
3398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3399 _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3400 {
3401   return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
3402 }
3403 
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi64(__mmask8 __A,__m128h __B,int __C)3406 _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3407 {
3408   return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3409 						 _mm512_setzero_si512 (),
3410 						 __A,
3411 						 __C);
3412 }
3413 
3414 #else
3415 #define _mm512_cvt_roundph_epi64(A, B)					\
3416   (__builtin_ia32_vcvtph2qq512_mask_round ((A),				\
3417 					   _mm512_setzero_si512 (),	\
3418 					   (__mmask8)-1,		\
3419 					   (B)))
3420 
3421 #define _mm512_mask_cvt_roundph_epi64(A, B, C, D)		\
3422   (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
3423 
3424 #define _mm512_maskz_cvt_roundph_epi64(A, B, C)				\
3425   (__builtin_ia32_vcvtph2qq512_mask_round ((B),				\
3426 					   _mm512_setzero_si512 (),	\
3427 					   (A),				\
3428 					   (C)))
3429 
3430 #endif /* __OPTIMIZE__ */
3431 
3432 /* Intrinsics vcvtph2uqq.  */
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu64(__m128h __A)3435 _mm512_cvtph_epu64 (__m128h __A)
3436 {
3437   return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3438 						  _mm512_setzero_si512 (),
3439 						  (__mmask8) -1,
3440 						  _MM_FROUND_CUR_DIRECTION);
3441 }
3442 
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu64(__m512i __A,__mmask8 __B,__m128h __C)3445 _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3446 {
3447   return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
3448 						  _MM_FROUND_CUR_DIRECTION);
3449 }
3450 
3451 extern __inline __m512i
3452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu64(__mmask8 __A,__m128h __B)3453 _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
3454 {
3455   return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3456 						  _mm512_setzero_si512 (),
3457 						  __A,
3458 						  _MM_FROUND_CUR_DIRECTION);
3459 }
3460 
3461 #ifdef __OPTIMIZE__
3462 
3463 extern __inline __m512i
3464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu64(__m128h __A,int __B)3465 _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
3466 {
3467   return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3468 						  _mm512_setzero_si512 (),
3469 						  (__mmask8) -1,
3470 						  __B);
3471 }
3472 
3473 extern __inline __m512i
3474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3475 _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3476 {
3477   return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
3478 }
3479 
3480 extern __inline __m512i
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu64(__mmask8 __A,__m128h __B,int __C)3482 _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3483 {
3484   return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3485 						  _mm512_setzero_si512 (),
3486 						  __A,
3487 						  __C);
3488 }
3489 
3490 #else
3491 #define _mm512_cvt_roundph_epu64(A, B)					\
3492   (__builtin_ia32_vcvtph2uqq512_mask_round ((A),			\
3493 					    _mm512_setzero_si512 (),	\
3494 					    (__mmask8)-1,		\
3495 					    (B)))
3496 
3497 #define _mm512_mask_cvt_roundph_epu64(A, B, C, D)			\
3498   (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
3499 
3500 #define _mm512_maskz_cvt_roundph_epu64(A, B, C)				\
3501   (__builtin_ia32_vcvtph2uqq512_mask_round ((B),			\
3502 					    _mm512_setzero_si512 (),	\
3503 					    (A),			\
3504 					    (C)))
3505 
3506 #endif /* __OPTIMIZE__ */
3507 
3508 /* Intrinsics vcvttph2qq.  */
3509 extern __inline __m512i
3510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi64(__m128h __A)3511 _mm512_cvttph_epi64 (__m128h __A)
3512 {
3513   return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3514 						  _mm512_setzero_si512 (),
3515 						  (__mmask8) -1,
3516 						  _MM_FROUND_CUR_DIRECTION);
3517 }
3518 
3519 extern __inline __m512i
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi64(__m512i __A,__mmask8 __B,__m128h __C)3521 _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3522 {
3523   return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
3524 						  _MM_FROUND_CUR_DIRECTION);
3525 }
3526 
3527 extern __inline __m512i
3528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi64(__mmask8 __A,__m128h __B)3529 _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
3530 {
3531   return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3532 						  _mm512_setzero_si512 (),
3533 						  __A,
3534 						  _MM_FROUND_CUR_DIRECTION);
3535 }
3536 
3537 #ifdef __OPTIMIZE__
3538 extern __inline __m512i
3539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi64(__m128h __A,int __B)3540 _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
3541 {
3542   return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3543 						  _mm512_setzero_si512 (),
3544 						  (__mmask8) -1,
3545 						  __B);
3546 }
3547 
3548 extern __inline __m512i
3549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3550 _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3551 {
3552   return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
3553 }
3554 
3555 extern __inline __m512i
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi64(__mmask8 __A,__m128h __B,int __C)3557 _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3558 {
3559   return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3560 						  _mm512_setzero_si512 (),
3561 						  __A,
3562 						  __C);
3563 }
3564 
3565 #else
3566 #define _mm512_cvtt_roundph_epi64(A, B)					\
3567   (__builtin_ia32_vcvttph2qq512_mask_round ((A),			\
3568 					    _mm512_setzero_si512 (),	\
3569 					    (__mmask8)-1,		\
3570 					    (B)))
3571 
3572 #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D)			\
3573   __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
3574 
3575 #define _mm512_maskz_cvtt_roundph_epi64(A, B, C)			\
3576   (__builtin_ia32_vcvttph2qq512_mask_round ((B),			\
3577 					    _mm512_setzero_si512 (),	\
3578 					    (A),			\
3579 					    (C)))
3580 
3581 #endif /* __OPTIMIZE__ */
3582 
3583 /* Intrinsics vcvttph2uqq.  */
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu64(__m128h __A)3586 _mm512_cvttph_epu64 (__m128h __A)
3587 {
3588   return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3589 						   _mm512_setzero_si512 (),
3590 						   (__mmask8) -1,
3591 						   _MM_FROUND_CUR_DIRECTION);
3592 }
3593 
3594 extern __inline __m512i
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu64(__m512i __A,__mmask8 __B,__m128h __C)3596 _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3597 {
3598   return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
3599 						   _MM_FROUND_CUR_DIRECTION);
3600 }
3601 
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu64(__mmask8 __A,__m128h __B)3604 _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
3605 {
3606   return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3607 						   _mm512_setzero_si512 (),
3608 						   __A,
3609 						   _MM_FROUND_CUR_DIRECTION);
3610 }
3611 
3612 #ifdef __OPTIMIZE__
3613 extern __inline __m512i
3614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu64(__m128h __A,int __B)3615 _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
3616 {
3617   return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3618 						   _mm512_setzero_si512 (),
3619 						   (__mmask8) -1,
3620 						   __B);
3621 }
3622 
3623 extern __inline __m512i
3624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3625 _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3626 {
3627   return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
3628 }
3629 
3630 extern __inline __m512i
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu64(__mmask8 __A,__m128h __B,int __C)3632 _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3633 {
3634   return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3635 						   _mm512_setzero_si512 (),
3636 						   __A,
3637 						   __C);
3638 }
3639 
3640 #else
3641 #define _mm512_cvtt_roundph_epu64(A, B)					\
3642   (__builtin_ia32_vcvttph2uqq512_mask_round ((A),			\
3643 					     _mm512_setzero_si512 (),	\
3644 					     (__mmask8)-1,		\
3645 					     (B)))
3646 
3647 #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D)			\
3648   __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
3649 
3650 #define _mm512_maskz_cvtt_roundph_epu64(A, B, C)			\
3651   (__builtin_ia32_vcvttph2uqq512_mask_round ((B),			\
3652 					     _mm512_setzero_si512 (),	\
3653 					     (A),			\
3654 					     (C)))
3655 
3656 #endif /* __OPTIMIZE__ */
3657 
3658 /* Intrinsics vcvtqq2ph.  */
3659 extern __inline __m128h
3660   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_ph(__m512i __A)3661 _mm512_cvtepi64_ph (__m512i __A)
3662 {
3663   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3664 						 _mm_setzero_ph (),
3665 						 (__mmask8) -1,
3666 						 _MM_FROUND_CUR_DIRECTION);
3667 }
3668 
3669 extern __inline __m128h
3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi64_ph(__m128h __A,__mmask8 __B,__m512i __C)3671 _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3672 {
3673   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3674 						 __A,
3675 						 __B,
3676 						 _MM_FROUND_CUR_DIRECTION);
3677 }
3678 
3679 extern __inline __m128h
3680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi64_ph(__mmask8 __A,__m512i __B)3681 _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
3682 {
3683   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3684 						 _mm_setzero_ph (),
3685 						 __A,
3686 						 _MM_FROUND_CUR_DIRECTION);
3687 }
3688 
3689 #ifdef __OPTIMIZE__
3690 extern __inline __m128h
3691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi64_ph(__m512i __A,int __B)3692 _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
3693 {
3694   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3695 						 _mm_setzero_ph (),
3696 						 (__mmask8) -1,
3697 						 __B);
3698 }
3699 
3700 extern __inline __m128h
3701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi64_ph(__m128h __A,__mmask8 __B,__m512i __C,int __D)3702 _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3703 {
3704   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3705 						 __A,
3706 						 __B,
3707 						 __D);
3708 }
3709 
3710 extern __inline __m128h
3711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi64_ph(__mmask8 __A,__m512i __B,int __C)3712 _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
3713 {
3714   return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3715 						 _mm_setzero_ph (),
3716 						 __A,
3717 						 __C);
3718 }
3719 
3720 #else
3721 #define _mm512_cvt_roundepi64_ph(A, B)				\
3722   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A),		\
3723 					   _mm_setzero_ph (),	\
3724 					   (__mmask8)-1,	\
3725 					   (B)))
3726 
3727 #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D)			\
3728   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3729 
3730 #define _mm512_maskz_cvt_roundepi64_ph(A, B, C)			\
3731   (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B),		\
3732 					   _mm_setzero_ph (),	\
3733 					   (A),			\
3734 					   (C)))
3735 
3736 #endif /* __OPTIMIZE__ */
3737 
3738 /* Intrinsics vcvtuqq2ph.  */
3739 extern __inline __m128h
3740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu64_ph(__m512i __A)3741 _mm512_cvtepu64_ph (__m512i __A)
3742 {
3743   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3744 						  _mm_setzero_ph (),
3745 						  (__mmask8) -1,
3746 						  _MM_FROUND_CUR_DIRECTION);
3747 }
3748 
3749 extern __inline __m128h
3750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu64_ph(__m128h __A,__mmask8 __B,__m512i __C)3751 _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3752 {
3753   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3754 						  __A,
3755 						  __B,
3756 						  _MM_FROUND_CUR_DIRECTION);
3757 }
3758 
3759 extern __inline __m128h
3760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu64_ph(__mmask8 __A,__m512i __B)3761 _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
3762 {
3763   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3764 						  _mm_setzero_ph (),
3765 						  __A,
3766 						  _MM_FROUND_CUR_DIRECTION);
3767 }
3768 
3769 #ifdef __OPTIMIZE__
3770 extern __inline __m128h
3771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu64_ph(__m512i __A,int __B)3772 _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
3773 {
3774   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3775 						  _mm_setzero_ph (),
3776 						  (__mmask8) -1,
3777 						  __B);
3778 }
3779 
3780 extern __inline __m128h
3781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu64_ph(__m128h __A,__mmask8 __B,__m512i __C,int __D)3782 _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3783 {
3784   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3785 						  __A,
3786 						  __B,
3787 						  __D);
3788 }
3789 
3790 extern __inline __m128h
3791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu64_ph(__mmask8 __A,__m512i __B,int __C)3792 _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
3793 {
3794   return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3795 						  _mm_setzero_ph (),
3796 						  __A,
3797 						  __C);
3798 }
3799 
3800 #else
3801 #define _mm512_cvt_roundepu64_ph(A, B)				\
3802   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A),	\
3803 					    _mm_setzero_ph (),	\
3804 					    (__mmask8)-1,	\
3805 					    (B)))
3806 
3807 #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D)			\
3808   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3809 
3810 #define _mm512_maskz_cvt_roundepu64_ph(A, B, C)			\
3811   (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B),	\
3812 					    _mm_setzero_ph (),	\
3813 					    (A),		\
3814 					    (C)))
3815 
3816 #endif /* __OPTIMIZE__ */
3817 
3818 /* Intrinsics vcvtph2w.  */
3819 extern __inline __m512i
3820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi16(__m512h __A)3821 _mm512_cvtph_epi16 (__m512h __A)
3822 {
3823   return (__m512i)
3824     __builtin_ia32_vcvtph2w512_mask_round (__A,
3825 					      (__v32hi)
3826 					      _mm512_setzero_si512 (),
3827 					      (__mmask32) -1,
3828 					      _MM_FROUND_CUR_DIRECTION);
3829 }
3830 
3831 extern __inline __m512i
3832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi16(__m512i __A,__mmask32 __B,__m512h __C)3833 _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
3834 {
3835   return (__m512i)
3836     __builtin_ia32_vcvtph2w512_mask_round (__C,
3837 					      (__v32hi) __A,
3838 					      __B,
3839 					      _MM_FROUND_CUR_DIRECTION);
3840 }
3841 
3842 extern __inline __m512i
3843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi16(__mmask32 __A,__m512h __B)3844 _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
3845 {
3846   return (__m512i)
3847     __builtin_ia32_vcvtph2w512_mask_round (__B,
3848 					      (__v32hi)
3849 					      _mm512_setzero_si512 (),
3850 					      __A,
3851 					      _MM_FROUND_CUR_DIRECTION);
3852 }
3853 
3854 #ifdef __OPTIMIZE__
3855 extern __inline __m512i
3856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi16(__m512h __A,int __B)3857 _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
3858 {
3859   return (__m512i)
3860     __builtin_ia32_vcvtph2w512_mask_round (__A,
3861 					      (__v32hi)
3862 					      _mm512_setzero_si512 (),
3863 					      (__mmask32) -1,
3864 					      __B);
3865 }
3866 
3867 extern __inline __m512i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi16(__m512i __A,__mmask32 __B,__m512h __C,int __D)3869 _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3870 {
3871   return (__m512i)
3872     __builtin_ia32_vcvtph2w512_mask_round (__C,
3873 					      (__v32hi) __A,
3874 					      __B,
3875 					      __D);
3876 }
3877 
3878 extern __inline __m512i
3879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi16(__mmask32 __A,__m512h __B,int __C)3880 _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
3881 {
3882   return (__m512i)
3883     __builtin_ia32_vcvtph2w512_mask_round (__B,
3884 					      (__v32hi)
3885 					      _mm512_setzero_si512 (),
3886 					      __A,
3887 					      __C);
3888 }
3889 
3890 #else
3891 #define _mm512_cvt_roundph_epi16(A, B)					\
3892   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A),		\
3893 						      (__v32hi)		\
3894 						      _mm512_setzero_si512 (), \
3895 						      (__mmask32)-1,	\
3896 						      (B)))
3897 
3898 #define _mm512_mask_cvt_roundph_epi16(A, B, C, D)			\
3899   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C),		\
3900 						      (__v32hi)(A),	\
3901 						      (B),		\
3902 						      (D)))
3903 
3904 #define _mm512_maskz_cvt_roundph_epi16(A, B, C)				\
3905   ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B),		\
3906 						      (__v32hi)		\
3907 						      _mm512_setzero_si512 (), \
3908 						      (A),		\
3909 						      (C)))
3910 
3911 #endif /* __OPTIMIZE__ */
3912 
3913 /* Intrinsics vcvtph2uw.  */
3914 extern __inline __m512i
3915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu16(__m512h __A)3916 _mm512_cvtph_epu16 (__m512h __A)
3917 {
3918   return (__m512i)
3919     __builtin_ia32_vcvtph2uw512_mask_round (__A,
3920 					       (__v32hi)
3921 					       _mm512_setzero_si512 (),
3922 					       (__mmask32) -1,
3923 					       _MM_FROUND_CUR_DIRECTION);
3924 }
3925 
3926 extern __inline __m512i
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu16(__m512i __A,__mmask32 __B,__m512h __C)3928 _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
3929 {
3930   return (__m512i)
3931     __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
3932 					       _MM_FROUND_CUR_DIRECTION);
3933 }
3934 
3935 extern __inline __m512i
3936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu16(__mmask32 __A,__m512h __B)3937 _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
3938 {
3939   return (__m512i)
3940     __builtin_ia32_vcvtph2uw512_mask_round (__B,
3941 					       (__v32hi)
3942 					       _mm512_setzero_si512 (),
3943 					       __A,
3944 					       _MM_FROUND_CUR_DIRECTION);
3945 }
3946 
3947 #ifdef __OPTIMIZE__
3948 extern __inline __m512i
3949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu16(__m512h __A,int __B)3950 _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
3951 {
3952   return (__m512i)
3953     __builtin_ia32_vcvtph2uw512_mask_round (__A,
3954 					       (__v32hi)
3955 					       _mm512_setzero_si512 (),
3956 					       (__mmask32) -1,
3957 					       __B);
3958 }
3959 
3960 extern __inline __m512i
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu16(__m512i __A,__mmask32 __B,__m512h __C,int __D)3962 _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3963 {
3964   return (__m512i)
3965     __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
3966 }
3967 
3968 extern __inline __m512i
3969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu16(__mmask32 __A,__m512h __B,int __C)3970 _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
3971 {
3972   return (__m512i)
3973     __builtin_ia32_vcvtph2uw512_mask_round (__B,
3974 					       (__v32hi)
3975 					       _mm512_setzero_si512 (),
3976 					       __A,
3977 					       __C);
3978 }
3979 
3980 #else
3981 #define _mm512_cvt_roundph_epu16(A, B)					\
3982   ((__m512i)								\
3983    __builtin_ia32_vcvtph2uw512_mask_round ((A),			\
3984 					      (__v32hi)			\
3985 					      _mm512_setzero_si512 (),	\
3986 					      (__mmask32)-1, (B)))
3987 
3988 #define _mm512_mask_cvt_roundph_epu16(A, B, C, D)			\
3989   ((__m512i)								\
3990    __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
3991 
3992 #define _mm512_maskz_cvt_roundph_epu16(A, B, C)				\
3993   ((__m512i)								\
3994    __builtin_ia32_vcvtph2uw512_mask_round ((B),			\
3995 					      (__v32hi)			\
3996 					      _mm512_setzero_si512 (),	\
3997 					      (A),			\
3998 					      (C)))
3999 
4000 #endif /* __OPTIMIZE__ */
4001 
4002 /* Intrinsics vcvttph2w.  */
4003 extern __inline __m512i
4004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi16(__m512h __A)4005 _mm512_cvttph_epi16 (__m512h __A)
4006 {
4007   return (__m512i)
4008     __builtin_ia32_vcvttph2w512_mask_round (__A,
4009 					    (__v32hi)
4010 					    _mm512_setzero_si512 (),
4011 					    (__mmask32) -1,
4012 					    _MM_FROUND_CUR_DIRECTION);
4013 }
4014 
4015 extern __inline __m512i
4016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi16(__m512i __A,__mmask32 __B,__m512h __C)4017 _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
4018 {
4019   return (__m512i)
4020     __builtin_ia32_vcvttph2w512_mask_round (__C,
4021 					    (__v32hi) __A,
4022 					    __B,
4023 					    _MM_FROUND_CUR_DIRECTION);
4024 }
4025 
4026 extern __inline __m512i
4027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi16(__mmask32 __A,__m512h __B)4028 _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
4029 {
4030   return (__m512i)
4031     __builtin_ia32_vcvttph2w512_mask_round (__B,
4032 					    (__v32hi)
4033 					    _mm512_setzero_si512 (),
4034 					    __A,
4035 					    _MM_FROUND_CUR_DIRECTION);
4036 }
4037 
4038 #ifdef __OPTIMIZE__
4039 extern __inline __m512i
4040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi16(__m512h __A,int __B)4041 _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
4042 {
4043   return (__m512i)
4044     __builtin_ia32_vcvttph2w512_mask_round (__A,
4045 					    (__v32hi)
4046 					    _mm512_setzero_si512 (),
4047 					    (__mmask32) -1,
4048 					    __B);
4049 }
4050 
4051 extern __inline __m512i
4052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi16(__m512i __A,__mmask32 __B,__m512h __C,int __D)4053 _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
4054 				__m512h __C, int __D)
4055 {
4056   return (__m512i)
4057     __builtin_ia32_vcvttph2w512_mask_round (__C,
4058 					    (__v32hi) __A,
4059 					    __B,
4060 					    __D);
4061 }
4062 
4063 extern __inline __m512i
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi16(__mmask32 __A,__m512h __B,int __C)4065 _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
4066 {
4067   return (__m512i)
4068     __builtin_ia32_vcvttph2w512_mask_round (__B,
4069 					    (__v32hi)
4070 					    _mm512_setzero_si512 (),
4071 					    __A,
4072 					    __C);
4073 }
4074 
4075 #else
4076 #define _mm512_cvtt_roundph_epi16(A, B)				    \
4077   ((__m512i)							    \
4078    __builtin_ia32_vcvttph2w512_mask_round ((A),			    \
4079 					   (__v32hi)		    \
4080 					   _mm512_setzero_si512 (), \
4081 					   (__mmask32)-1,	    \
4082 					   (B)))
4083 
4084 #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D)		\
4085   ((__m512i)							\
4086    __builtin_ia32_vcvttph2w512_mask_round ((C),			\
4087 					   (__v32hi)(A),	\
4088 					   (B),			\
4089 					   (D)))
4090 
4091 #define _mm512_maskz_cvtt_roundph_epi16(A, B, C)		    \
4092   ((__m512i)							    \
4093    __builtin_ia32_vcvttph2w512_mask_round ((B),			    \
4094 					   (__v32hi)		    \
4095 					   _mm512_setzero_si512 (), \
4096 					   (A),			    \
4097 					   (C)))
4098 
4099 #endif /* __OPTIMIZE__ */
4100 
4101 /* Intrinsics vcvttph2uw.  */
4102 extern __inline __m512i
4103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu16(__m512h __A)4104 _mm512_cvttph_epu16 (__m512h __A)
4105 {
4106   return (__m512i)
4107     __builtin_ia32_vcvttph2uw512_mask_round (__A,
4108 					     (__v32hi)
4109 					     _mm512_setzero_si512 (),
4110 					     (__mmask32) -1,
4111 					     _MM_FROUND_CUR_DIRECTION);
4112 }
4113 
4114 extern __inline __m512i
4115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu16(__m512i __A,__mmask32 __B,__m512h __C)4116 _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
4117 {
4118   return (__m512i)
4119     __builtin_ia32_vcvttph2uw512_mask_round (__C,
4120 					     (__v32hi) __A,
4121 					     __B,
4122 					     _MM_FROUND_CUR_DIRECTION);
4123 }
4124 
4125 extern __inline __m512i
4126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu16(__mmask32 __A,__m512h __B)4127 _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
4128 {
4129   return (__m512i)
4130     __builtin_ia32_vcvttph2uw512_mask_round (__B,
4131 					     (__v32hi)
4132 					     _mm512_setzero_si512 (),
4133 					     __A,
4134 					     _MM_FROUND_CUR_DIRECTION);
4135 }
4136 
4137 #ifdef __OPTIMIZE__
4138 extern __inline __m512i
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu16(__m512h __A,int __B)4140 _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
4141 {
4142   return (__m512i)
4143     __builtin_ia32_vcvttph2uw512_mask_round (__A,
4144 					     (__v32hi)
4145 					     _mm512_setzero_si512 (),
4146 					     (__mmask32) -1,
4147 					     __B);
4148 }
4149 
4150 extern __inline __m512i
4151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu16(__m512i __A,__mmask32 __B,__m512h __C,int __D)4152 _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
4153 				__m512h __C, int __D)
4154 {
4155   return (__m512i)
4156     __builtin_ia32_vcvttph2uw512_mask_round (__C,
4157 					     (__v32hi) __A,
4158 					     __B,
4159 					     __D);
4160 }
4161 
4162 extern __inline __m512i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu16(__mmask32 __A,__m512h __B,int __C)4164 _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
4165 {
4166   return (__m512i)
4167     __builtin_ia32_vcvttph2uw512_mask_round (__B,
4168 					     (__v32hi)
4169 					     _mm512_setzero_si512 (),
4170 					     __A,
4171 					     __C);
4172 }
4173 
4174 #else
4175 #define _mm512_cvtt_roundph_epu16(A, B)				     \
4176   ((__m512i)							     \
4177    __builtin_ia32_vcvttph2uw512_mask_round ((A),		     \
4178 					    (__v32hi)		     \
4179 					    _mm512_setzero_si512 (), \
4180 					    (__mmask32)-1,	     \
4181 					    (B)))
4182 
4183 #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D)		\
4184   ((__m512i)							\
4185    __builtin_ia32_vcvttph2uw512_mask_round ((C),		\
4186 					    (__v32hi)(A),	\
4187 					    (B),		\
4188 					    (D)))
4189 
4190 #define _mm512_maskz_cvtt_roundph_epu16(A, B, C)		     \
4191   ((__m512i)							     \
4192    __builtin_ia32_vcvttph2uw512_mask_round ((B),		     \
4193 					    (__v32hi)		     \
4194 					    _mm512_setzero_si512 (), \
4195 					    (A),		     \
4196 					    (C)))
4197 
4198 #endif /* __OPTIMIZE__ */
4199 
4200 /* Intrinsics vcvtw2ph.  */
4201 extern __inline __m512h
4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_ph(__m512i __A)4203 _mm512_cvtepi16_ph (__m512i __A)
4204 {
4205   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4206 						_mm512_setzero_ph (),
4207 						(__mmask32) -1,
4208 						_MM_FROUND_CUR_DIRECTION);
4209 }
4210 
4211 extern __inline __m512h
4212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_ph(__m512h __A,__mmask32 __B,__m512i __C)4213 _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4214 {
4215   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4216 						__A,
4217 						__B,
4218 						_MM_FROUND_CUR_DIRECTION);
4219 }
4220 
4221 extern __inline __m512h
4222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_ph(__mmask32 __A,__m512i __B)4223 _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
4224 {
4225   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4226 						_mm512_setzero_ph (),
4227 						__A,
4228 						_MM_FROUND_CUR_DIRECTION);
4229 }
4230 
4231 #ifdef __OPTIMIZE__
4232 extern __inline __m512h
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi16_ph(__m512i __A,int __B)4234 _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
4235 {
4236   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4237 						_mm512_setzero_ph (),
4238 						(__mmask32) -1,
4239 						__B);
4240 }
4241 
4242 extern __inline __m512h
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi16_ph(__m512h __A,__mmask32 __B,__m512i __C,int __D)4244 _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4245 {
4246   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4247 						__A,
4248 						__B,
4249 						__D);
4250 }
4251 
4252 extern __inline __m512h
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi16_ph(__mmask32 __A,__m512i __B,int __C)4254 _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
4255 {
4256   return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4257 						_mm512_setzero_ph (),
4258 						__A,
4259 						__C);
4260 }
4261 
4262 #else
4263 #define _mm512_cvt_roundepi16_ph(A, B)				\
4264   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A),		\
4265 					  _mm512_setzero_ph (),	\
4266 					  (__mmask32)-1,	\
4267 					  (B)))
4268 
4269 #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D)	\
4270   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C),	\
4271 					  (A),		\
4272 					  (B),		\
4273 					  (D)))
4274 
4275 #define _mm512_maskz_cvt_roundepi16_ph(A, B, C)			\
4276   (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B),		\
4277 					  _mm512_setzero_ph (),	\
4278 					  (A),			\
4279 					  (C)))
4280 
4281 #endif /* __OPTIMIZE__ */
4282 
4283 /* Intrinsics vcvtuw2ph.  */
4284   extern __inline __m512h
4285   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_ph(__m512i __A)4286   _mm512_cvtepu16_ph (__m512i __A)
4287   {
4288     return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4289 						   _mm512_setzero_ph (),
4290 						   (__mmask32) -1,
4291 						   _MM_FROUND_CUR_DIRECTION);
4292   }
4293 
4294 extern __inline __m512h
4295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_ph(__m512h __A,__mmask32 __B,__m512i __C)4296 _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4297 {
4298   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4299 						 __A,
4300 						 __B,
4301 						 _MM_FROUND_CUR_DIRECTION);
4302 }
4303 
4304 extern __inline __m512h
4305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_ph(__mmask32 __A,__m512i __B)4306 _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
4307 {
4308   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4309 						 _mm512_setzero_ph (),
4310 						 __A,
4311 						 _MM_FROUND_CUR_DIRECTION);
4312 }
4313 
4314 #ifdef __OPTIMIZE__
4315 extern __inline __m512h
4316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu16_ph(__m512i __A,int __B)4317 _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
4318 {
4319   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4320 						 _mm512_setzero_ph (),
4321 						 (__mmask32) -1,
4322 						 __B);
4323 }
4324 
4325 extern __inline __m512h
4326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu16_ph(__m512h __A,__mmask32 __B,__m512i __C,int __D)4327 _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4328 {
4329   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4330 						 __A,
4331 						 __B,
4332 						 __D);
4333 }
4334 
4335 extern __inline __m512h
4336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu16_ph(__mmask32 __A,__m512i __B,int __C)4337 _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
4338 {
4339   return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4340 						 _mm512_setzero_ph (),
4341 						 __A,
4342 						 __C);
4343 }
4344 
4345 #else
4346 #define _mm512_cvt_roundepu16_ph(A, B)					\
4347   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A),		\
4348 					   _mm512_setzero_ph (),	\
4349 					   (__mmask32)-1,		\
4350 					   (B)))
4351 
4352 #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D)		\
4353   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C),	\
4354 					   (A),			\
4355 					   (B),			\
4356 					   (D)))
4357 
4358 #define _mm512_maskz_cvt_roundepu16_ph(A, B, C)				\
4359   (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B),		\
4360 					   _mm512_setzero_ph (),	\
4361 					   (A),				\
4362 					   (C)))
4363 
4364 #endif /* __OPTIMIZE__ */
4365 
4366 /* Intrinsics vcvtsh2si, vcvtsh2us.  */
4367 extern __inline int
4368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_i32(__m128h __A)4369 _mm_cvtsh_i32 (__m128h __A)
4370 {
4371   return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4372 }
4373 
4374 extern __inline unsigned
4375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_u32(__m128h __A)4376 _mm_cvtsh_u32 (__m128h __A)
4377 {
4378   return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
4379 						  _MM_FROUND_CUR_DIRECTION);
4380 }
4381 
4382 #ifdef __OPTIMIZE__
4383 extern __inline int
4384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_i32(__m128h __A,const int __R)4385 _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
4386 {
4387   return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
4388 }
4389 
4390 extern __inline unsigned
4391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_u32(__m128h __A,const int __R)4392 _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
4393 {
4394   return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
4395 }
4396 
4397 #else
4398 #define _mm_cvt_roundsh_i32(A, B)		\
4399   ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
4400 #define _mm_cvt_roundsh_u32(A, B)		\
4401   ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
4402 
4403 #endif /* __OPTIMIZE__ */
4404 
4405 #ifdef __x86_64__
4406 extern __inline long long
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_i64(__m128h __A)4408 _mm_cvtsh_i64 (__m128h __A)
4409 {
4410   return (long long)
4411     __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4412 }
4413 
4414 extern __inline unsigned long long
4415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_u64(__m128h __A)4416 _mm_cvtsh_u64 (__m128h __A)
4417 {
4418   return (long long)
4419     __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4420 }
4421 
4422 #ifdef __OPTIMIZE__
4423 extern __inline long long
4424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_i64(__m128h __A,const int __R)4425 _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
4426 {
4427   return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
4428 }
4429 
4430 extern __inline unsigned long long
4431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_u64(__m128h __A,const int __R)4432 _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
4433 {
4434   return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
4435 }
4436 
4437 #else
4438 #define _mm_cvt_roundsh_i64(A, B)			\
4439   ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
4440 #define _mm_cvt_roundsh_u64(A, B)			\
4441   ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
4442 
4443 #endif /* __OPTIMIZE__ */
4444 #endif /* __x86_64__ */
4445 
4446 /* Intrinsics vcvttsh2si, vcvttsh2us.  */
4447 extern __inline int
4448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_i32(__m128h __A)4449 _mm_cvttsh_i32 (__m128h __A)
4450 {
4451   return (int)
4452     __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4453 }
4454 
4455 extern __inline unsigned
4456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_u32(__m128h __A)4457 _mm_cvttsh_u32 (__m128h __A)
4458 {
4459   return (int)
4460     __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
4461 }
4462 
4463 #ifdef __OPTIMIZE__
4464 extern __inline int
4465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_i32(__m128h __A,const int __R)4466 _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
4467 {
4468   return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
4469 }
4470 
4471 extern __inline unsigned
4472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_u32(__m128h __A,const int __R)4473 _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
4474 {
4475   return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
4476 }
4477 
4478 #else
4479 #define _mm_cvtt_roundsh_i32(A, B)		\
4480   ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
4481 #define _mm_cvtt_roundsh_u32(A, B)		\
4482   ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
4483 
4484 #endif /* __OPTIMIZE__ */
4485 
4486 #ifdef __x86_64__
4487 extern __inline long long
4488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_i64(__m128h __A)4489 _mm_cvttsh_i64 (__m128h __A)
4490 {
4491   return (long long)
4492     __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4493 }
4494 
4495 extern __inline unsigned long long
4496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_u64(__m128h __A)4497 _mm_cvttsh_u64 (__m128h __A)
4498 {
4499   return (long long)
4500     __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4501 }
4502 
4503 #ifdef __OPTIMIZE__
4504 extern __inline long long
4505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_i64(__m128h __A,const int __R)4506 _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
4507 {
4508   return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
4509 }
4510 
4511 extern __inline unsigned long long
4512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_u64(__m128h __A,const int __R)4513 _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
4514 {
4515   return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
4516 }
4517 
4518 #else
4519 #define _mm_cvtt_roundsh_i64(A, B)			\
4520   ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
4521 #define _mm_cvtt_roundsh_u64(A, B)			\
4522   ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
4523 
4524 #endif /* __OPTIMIZE__ */
4525 #endif /* __x86_64__ */
4526 
4527 /* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
4528 extern __inline __m128h
4529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvti32_sh(__m128h __A,int __B)4530 _mm_cvti32_sh (__m128h __A, int __B)
4531 {
4532   return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4533 }
4534 
4535 extern __inline __m128h
4536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtu32_sh(__m128h __A,unsigned int __B)4537 _mm_cvtu32_sh (__m128h __A, unsigned int __B)
4538 {
4539   return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4540 }
4541 
4542 #ifdef __OPTIMIZE__
4543 extern __inline __m128h
4544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundi32_sh(__m128h __A,int __B,const int __R)4545 _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
4546 {
4547   return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
4548 }
4549 
4550 extern __inline __m128h
4551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundu32_sh(__m128h __A,unsigned int __B,const int __R)4552 _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
4553 {
4554   return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
4555 }
4556 
4557 #else
4558 #define _mm_cvt_roundi32_sh(A, B, C)		\
4559   (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
4560 #define _mm_cvt_roundu32_sh(A, B, C)		\
4561   (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
4562 
4563 #endif /* __OPTIMIZE__ */
4564 
4565 #ifdef __x86_64__
4566 extern __inline __m128h
4567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvti64_sh(__m128h __A,long long __B)4568 _mm_cvti64_sh (__m128h __A, long long __B)
4569 {
4570   return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4571 }
4572 
4573 extern __inline __m128h
4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtu64_sh(__m128h __A,unsigned long long __B)4575 _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
4576 {
4577   return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4578 }
4579 
4580 #ifdef __OPTIMIZE__
4581 extern __inline __m128h
4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundi64_sh(__m128h __A,long long __B,const int __R)4583 _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
4584 {
4585   return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
4586 }
4587 
4588 extern __inline __m128h
4589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundu64_sh(__m128h __A,unsigned long long __B,const int __R)4590 _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
4591 {
4592   return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
4593 }
4594 
4595 #else
4596 #define _mm_cvt_roundi64_sh(A, B, C)		\
4597   (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
4598 #define _mm_cvt_roundu64_sh(A, B, C)		\
4599   (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
4600 
4601 #endif /* __OPTIMIZE__ */
4602 #endif /* __x86_64__ */
4603 
4604 /* Intrinsics vcvtph2pd.  */
4605 extern __inline __m512d
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_pd(__m128h __A)4607 _mm512_cvtph_pd (__m128h __A)
4608 {
4609   return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4610 						 _mm512_setzero_pd (),
4611 						 (__mmask8) -1,
4612 						 _MM_FROUND_CUR_DIRECTION);
4613 }
4614 
4615 extern __inline __m512d
4616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_pd(__m512d __A,__mmask8 __B,__m128h __C)4617 _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
4618 {
4619   return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
4620 						 _MM_FROUND_CUR_DIRECTION);
4621 }
4622 
4623 extern __inline __m512d
4624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_pd(__mmask8 __A,__m128h __B)4625 _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
4626 {
4627   return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4628 						 _mm512_setzero_pd (),
4629 						 __A,
4630 						 _MM_FROUND_CUR_DIRECTION);
4631 }
4632 
4633 #ifdef __OPTIMIZE__
4634 extern __inline __m512d
4635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_pd(__m128h __A,int __B)4636 _mm512_cvt_roundph_pd (__m128h __A, int __B)
4637 {
4638   return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4639 						 _mm512_setzero_pd (),
4640 						 (__mmask8) -1,
4641 						 __B);
4642 }
4643 
4644 extern __inline __m512d
4645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_pd(__m512d __A,__mmask8 __B,__m128h __C,int __D)4646 _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
4647 {
4648   return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
4649 }
4650 
4651 extern __inline __m512d
4652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_pd(__mmask8 __A,__m128h __B,int __C)4653 _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
4654 {
4655   return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4656 						 _mm512_setzero_pd (),
4657 						 __A,
4658 						 __C);
4659 }
4660 
4661 #else
4662 #define _mm512_cvt_roundph_pd(A, B)					\
4663   (__builtin_ia32_vcvtph2pd512_mask_round ((A),			\
4664 					   _mm512_setzero_pd (),	\
4665 					   (__mmask8)-1,		\
4666 					   (B)))
4667 
4668 #define _mm512_mask_cvt_roundph_pd(A, B, C, D)				\
4669   (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
4670 
4671 #define _mm512_maskz_cvt_roundph_pd(A, B, C)				\
4672   (__builtin_ia32_vcvtph2pd512_mask_round ((B),			\
4673 					   _mm512_setzero_pd (),	\
4674 					   (A),			\
4675 					   (C)))
4676 
4677 #endif /* __OPTIMIZE__ */
4678 
4679 /* Intrinsics vcvtph2psx.  */
4680 extern __inline __m512
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtxph_ps(__m256h __A)4682 _mm512_cvtxph_ps (__m256h __A)
4683 {
4684   return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4685 						  _mm512_setzero_ps (),
4686 						  (__mmask16) -1,
4687 						  _MM_FROUND_CUR_DIRECTION);
4688 }
4689 
4690 extern __inline __m512
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtxph_ps(__m512 __A,__mmask16 __B,__m256h __C)4692 _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
4693 {
4694   return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
4695 						  _MM_FROUND_CUR_DIRECTION);
4696 }
4697 
4698 extern __inline __m512
4699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtxph_ps(__mmask16 __A,__m256h __B)4700 _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
4701 {
4702   return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4703 						  _mm512_setzero_ps (),
4704 						  __A,
4705 						  _MM_FROUND_CUR_DIRECTION);
4706 }
4707 
4708 #ifdef __OPTIMIZE__
4709 extern __inline __m512
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtx_roundph_ps(__m256h __A,int __B)4711 _mm512_cvtx_roundph_ps (__m256h __A, int __B)
4712 {
4713   return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4714 						  _mm512_setzero_ps (),
4715 						  (__mmask16) -1,
4716 						  __B);
4717 }
4718 
4719 extern __inline __m512
4720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtx_roundph_ps(__m512 __A,__mmask16 __B,__m256h __C,int __D)4721 _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
4722 {
4723   return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
4724 }
4725 
4726 extern __inline __m512
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtx_roundph_ps(__mmask16 __A,__m256h __B,int __C)4728 _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
4729 {
4730   return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4731 						  _mm512_setzero_ps (),
4732 						  __A,
4733 						  __C);
4734 }
4735 
4736 #else
4737 #define _mm512_cvtx_roundph_ps(A, B)					\
4738   (__builtin_ia32_vcvtph2psx512_mask_round ((A),			\
4739 					    _mm512_setzero_ps (),	\
4740 					    (__mmask16)-1,		\
4741 					    (B)))
4742 
4743 #define _mm512_mask_cvtx_roundph_ps(A, B, C, D)				\
4744   (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
4745 
4746 #define _mm512_maskz_cvtx_roundph_ps(A, B, C)				\
4747   (__builtin_ia32_vcvtph2psx512_mask_round ((B),			\
4748 					    _mm512_setzero_ps (),	\
4749 					    (A),			\
4750 					    (C)))
4751 #endif /* __OPTIMIZE__ */
4752 
4753 /* Intrinsics vcvtps2ph.  */
4754 extern __inline __m256h
4755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtxps_ph(__m512 __A)4756 _mm512_cvtxps_ph (__m512 __A)
4757 {
4758   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4759 						  _mm256_setzero_ph (),
4760 						  (__mmask16) -1,
4761 						  _MM_FROUND_CUR_DIRECTION);
4762 }
4763 
4764 extern __inline __m256h
4765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtxps_ph(__m256h __A,__mmask16 __B,__m512 __C)4766 _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
4767 {
4768   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4769 						  __A, __B,
4770 						  _MM_FROUND_CUR_DIRECTION);
4771 }
4772 
4773 extern __inline __m256h
4774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtxps_ph(__mmask16 __A,__m512 __B)4775 _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
4776 {
4777   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4778 						  _mm256_setzero_ph (),
4779 						  __A,
4780 						  _MM_FROUND_CUR_DIRECTION);
4781 }
4782 
4783 #ifdef __OPTIMIZE__
4784 extern __inline __m256h
4785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtx_roundps_ph(__m512 __A,int __B)4786 _mm512_cvtx_roundps_ph (__m512 __A, int __B)
4787 {
4788   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4789 						  _mm256_setzero_ph (),
4790 						  (__mmask16) -1,
4791 						  __B);
4792 }
4793 
4794 extern __inline __m256h
4795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtx_roundps_ph(__m256h __A,__mmask16 __B,__m512 __C,int __D)4796 _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
4797 {
4798   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4799 						  __A, __B, __D);
4800 }
4801 
4802 extern __inline __m256h
4803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtx_roundps_ph(__mmask16 __A,__m512 __B,int __C)4804 _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
4805 {
4806   return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4807 						  _mm256_setzero_ph (),
4808 						  __A, __C);
4809 }
4810 
4811 #else
4812 #define _mm512_cvtx_roundps_ph(A, B)				\
4813   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A),	\
4814 					    _mm256_setzero_ph (),\
4815 					    (__mmask16)-1, (B)))
4816 
4817 #define _mm512_mask_cvtx_roundps_ph(A, B, C, D)			\
4818   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C),	\
4819 					    (A), (B), (D)))
4820 
4821 #define _mm512_maskz_cvtx_roundps_ph(A, B, C)			\
4822   (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B),	\
4823 					    _mm256_setzero_ph (),\
4824 					    (A), (C)))
4825 #endif /* __OPTIMIZE__ */
4826 
4827 /* Intrinsics vcvtpd2ph.  */
4828 extern __inline __m128h
4829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtpd_ph(__m512d __A)4830 _mm512_cvtpd_ph (__m512d __A)
4831 {
4832   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4833 						 _mm_setzero_ph (),
4834 						 (__mmask8) -1,
4835 						 _MM_FROUND_CUR_DIRECTION);
4836 }
4837 
4838 extern __inline __m128h
4839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtpd_ph(__m128h __A,__mmask8 __B,__m512d __C)4840 _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
4841 {
4842   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4843 						 __A, __B,
4844 						 _MM_FROUND_CUR_DIRECTION);
4845 }
4846 
4847 extern __inline __m128h
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtpd_ph(__mmask8 __A,__m512d __B)4849 _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
4850 {
4851   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4852 						 _mm_setzero_ph (),
4853 						 __A,
4854 						 _MM_FROUND_CUR_DIRECTION);
4855 }
4856 
4857 #ifdef __OPTIMIZE__
4858 extern __inline __m128h
4859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundpd_ph(__m512d __A,int __B)4860 _mm512_cvt_roundpd_ph (__m512d __A, int __B)
4861 {
4862   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4863 						 _mm_setzero_ph (),
4864 						 (__mmask8) -1,
4865 						 __B);
4866 }
4867 
4868 extern __inline __m128h
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundpd_ph(__m128h __A,__mmask8 __B,__m512d __C,int __D)4870 _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
4871 {
4872   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4873 						 __A, __B, __D);
4874 }
4875 
4876 extern __inline __m128h
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundpd_ph(__mmask8 __A,__m512d __B,int __C)4878 _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
4879 {
4880   return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4881 						 _mm_setzero_ph (),
4882 						 __A, __C);
4883 }
4884 
4885 #else
4886 #define _mm512_cvt_roundpd_ph(A, B)				\
4887   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A),		\
4888 					   _mm_setzero_ph (),	\
4889 					   (__mmask8)-1, (B)))
4890 
4891 #define _mm512_mask_cvt_roundpd_ph(A, B, C, D)			\
4892   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C),		\
4893 					   (A), (B), (D)))
4894 
4895 #define _mm512_maskz_cvt_roundpd_ph(A, B, C)			\
4896   (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B),		\
4897 					   _mm_setzero_ph (),	\
4898 					   (A), (C)))
4899 
4900 #endif /* __OPTIMIZE__ */
4901 
4902 /* Intrinsics vcvtsh2ss, vcvtsh2sd.  */
4903 extern __inline __m128
4904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_ss(__m128 __A,__m128h __B)4905 _mm_cvtsh_ss (__m128 __A, __m128h __B)
4906 {
4907   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4908 					      _mm_setzero_ps (),
4909 					      (__mmask8) -1,
4910 					      _MM_FROUND_CUR_DIRECTION);
4911 }
4912 
4913 extern __inline __m128
4914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsh_ss(__m128 __A,__mmask8 __B,__m128 __C,__m128h __D)4915 _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4916 			 __m128h __D)
4917 {
4918   return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
4919 					      _MM_FROUND_CUR_DIRECTION);
4920 }
4921 
4922 extern __inline __m128
4923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsh_ss(__mmask8 __A,__m128 __B,__m128h __C)4924 _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
4925 			  __m128h __C)
4926 {
4927   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4928 					      _mm_setzero_ps (),
4929 					      __A, _MM_FROUND_CUR_DIRECTION);
4930 }
4931 
4932 extern __inline __m128d
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_sd(__m128d __A,__m128h __B)4934 _mm_cvtsh_sd (__m128d __A, __m128h __B)
4935 {
4936   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4937 					      _mm_setzero_pd (),
4938 					      (__mmask8) -1,
4939 					      _MM_FROUND_CUR_DIRECTION);
4940 }
4941 
4942 extern __inline __m128d
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsh_sd(__m128d __A,__mmask8 __B,__m128d __C,__m128h __D)4944 _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
4945 			 __m128h __D)
4946 {
4947   return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
4948 					      _MM_FROUND_CUR_DIRECTION);
4949 }
4950 
4951 extern __inline __m128d
4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsh_sd(__mmask8 __A,__m128d __B,__m128h __C)4953 _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
4954 {
4955   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
4956 					      _mm_setzero_pd (),
4957 					      __A, _MM_FROUND_CUR_DIRECTION);
4958 }
4959 
4960 #ifdef __OPTIMIZE__
4961 extern __inline __m128
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_ss(__m128 __A,__m128h __B,const int __R)4963 _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
4964 {
4965   return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4966 					      _mm_setzero_ps (),
4967 					      (__mmask8) -1, __R);
4968 }
4969 
4970 extern __inline __m128
4971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsh_ss(__m128 __A,__mmask8 __B,__m128 __C,__m128h __D,const int __R)4972 _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4973 			 __m128h __D, const int __R)
4974 {
4975   return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
4976 }
4977 
4978 extern __inline __m128
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsh_ss(__mmask8 __A,__m128 __B,__m128h __C,const int __R)4980 _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
4981 			  __m128h __C, const int __R)
4982 {
4983   return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4984 					      _mm_setzero_ps (),
4985 					      __A, __R);
4986 }
4987 
4988 extern __inline __m128d
4989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_sd(__m128d __A,__m128h __B,const int __R)4990 _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
4991 {
4992   return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4993 					      _mm_setzero_pd (),
4994 					      (__mmask8) -1, __R);
4995 }
4996 
4997 extern __inline __m128d
4998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsh_sd(__m128d __A,__mmask8 __B,__m128d __C,__m128h __D,const int __R)4999 _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
5000 			 __m128h __D, const int __R)
5001 {
5002   return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
5003 }
5004 
5005 extern __inline __m128d
5006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsh_sd(__mmask8 __A,__m128d __B,__m128h __C,const int __R)5007 _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
5008 {
5009   return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
5010 					      _mm_setzero_pd (),
5011 					      __A, __R);
5012 }
5013 
5014 #else
5015 #define _mm_cvt_roundsh_ss(A, B, R)				\
5016   (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A),		\
5017 					_mm_setzero_ps (),	\
5018 					(__mmask8) -1, (R)))
5019 
5020 #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R)				\
5021   (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
5022 
5023 #define _mm_maskz_cvt_roundsh_ss(A, B, C, R)			\
5024   (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B),		\
5025 					_mm_setzero_ps (),	\
5026 					(A), (R)))
5027 
5028 #define _mm_cvt_roundsh_sd(A, B, R)				\
5029   (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A),		\
5030 					_mm_setzero_pd (),	\
5031 					(__mmask8) -1, (R)))
5032 
5033 #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R)				\
5034   (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
5035 
5036 #define _mm_maskz_cvt_roundsh_sd(A, B, C, R)			\
5037   (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B),		\
5038 					_mm_setzero_pd (),	\
5039 					(A), (R)))
5040 
5041 #endif /* __OPTIMIZE__ */
5042 
5043 /* Intrinsics vcvtss2sh, vcvtsd2sh.  */
5044 extern __inline __m128h
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sh(__m128h __A,__m128 __B)5046 _mm_cvtss_sh (__m128h __A, __m128 __B)
5047 {
5048   return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5049 					      _mm_setzero_ph (),
5050 					      (__mmask8) -1,
5051 					      _MM_FROUND_CUR_DIRECTION);
5052 }
5053 
5054 extern __inline __m128h
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtss_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128 __D)5056 _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
5057 {
5058   return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
5059 					      _MM_FROUND_CUR_DIRECTION);
5060 }
5061 
5062 extern __inline __m128h
5063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtss_sh(__mmask8 __A,__m128h __B,__m128 __C)5064 _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
5065 {
5066   return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5067 					      _mm_setzero_ph (),
5068 					      __A, _MM_FROUND_CUR_DIRECTION);
5069 }
5070 
5071 extern __inline __m128h
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_sh(__m128h __A,__m128d __B)5073 _mm_cvtsd_sh (__m128h __A, __m128d __B)
5074 {
5075   return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5076 					      _mm_setzero_ph (),
5077 					      (__mmask8) -1,
5078 					      _MM_FROUND_CUR_DIRECTION);
5079 }
5080 
5081 extern __inline __m128h
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsd_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128d __D)5083 _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
5084 {
5085   return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
5086 					      _MM_FROUND_CUR_DIRECTION);
5087 }
5088 
5089 extern __inline __m128h
5090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsd_sh(__mmask8 __A,__m128h __B,__m128d __C)5091 _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
5092 {
5093   return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5094 					      _mm_setzero_ph (),
5095 					      __A, _MM_FROUND_CUR_DIRECTION);
5096 }
5097 
5098 #ifdef __OPTIMIZE__
5099 extern __inline __m128h
5100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundss_sh(__m128h __A,__m128 __B,const int __R)5101 _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
5102 {
5103   return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5104 					      _mm_setzero_ph (),
5105 					      (__mmask8) -1, __R);
5106 }
5107 
5108 extern __inline __m128h
5109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundss_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128 __D,const int __R)5110 _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
5111 			 const int __R)
5112 {
5113   return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
5114 }
5115 
5116 extern __inline __m128h
5117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundss_sh(__mmask8 __A,__m128h __B,__m128 __C,const int __R)5118 _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
5119 			  const int __R)
5120 {
5121   return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5122 					      _mm_setzero_ph (),
5123 					      __A, __R);
5124 }
5125 
5126 extern __inline __m128h
5127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsd_sh(__m128h __A,__m128d __B,const int __R)5128 _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
5129 {
5130   return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5131 					      _mm_setzero_ph (),
5132 					      (__mmask8) -1, __R);
5133 }
5134 
5135 extern __inline __m128h
5136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsd_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128d __D,const int __R)5137 _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
5138 			 const int __R)
5139 {
5140   return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
5141 }
5142 
5143 extern __inline __m128h
5144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsd_sh(__mmask8 __A,__m128h __B,__m128d __C,const int __R)5145 _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
5146 			  const int __R)
5147 {
5148   return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5149 					      _mm_setzero_ph (),
5150 					      __A, __R);
5151 }
5152 
5153 #else
5154 #define _mm_cvt_roundss_sh(A, B, R)				\
5155   (__builtin_ia32_vcvtss2sh_mask_round ((B), (A),		\
5156 					_mm_setzero_ph (),	\
5157 					(__mmask8) -1, R))
5158 
5159 #define _mm_mask_cvt_roundss_sh(A, B, C, D, R)				\
5160   (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
5161 
5162 #define _mm_maskz_cvt_roundss_sh(A, B, C, R)			\
5163   (__builtin_ia32_vcvtss2sh_mask_round ((C), (B),		\
5164 					_mm_setzero_ph (),	\
5165 					A, R))
5166 
5167 #define _mm_cvt_roundsd_sh(A, B, R)				\
5168   (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A),		\
5169 					_mm_setzero_ph (),	\
5170 					(__mmask8) -1, R))
5171 
5172 #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R)				\
5173   (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
5174 
5175 #define _mm_maskz_cvt_roundsd_sh(A, B, C, R)			\
5176   (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B),		\
5177 					_mm_setzero_ph (),	\
5178 					(A), (R)))
5179 
5180 #endif /* __OPTIMIZE__ */
5181 
5182 /* Intrinsics vfmaddsub[132,213,231]ph.  */
5183 extern __inline __m512h
5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmaddsub_ph(__m512h __A,__m512h __B,__m512h __C)5185 _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
5186 {
5187   return (__m512h)
5188     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5189 					(__v32hf) __B,
5190 					(__v32hf) __C,
5191 					(__mmask32) -1,
5192 					_MM_FROUND_CUR_DIRECTION);
5193 }
5194 
5195 extern __inline __m512h
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmaddsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5197 _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5198 {
5199   return (__m512h)
5200     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5201 					(__v32hf) __B,
5202 					(__v32hf) __C,
5203 					(__mmask32) __U,
5204 					_MM_FROUND_CUR_DIRECTION);
5205 }
5206 
5207 extern __inline __m512h
5208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmaddsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5209 _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5210 {
5211   return (__m512h)
5212     __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5213 					 (__v32hf) __B,
5214 					 (__v32hf) __C,
5215 					 (__mmask32) __U,
5216 					 _MM_FROUND_CUR_DIRECTION);
5217 }
5218 
5219 extern __inline __m512h
5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmaddsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5221 _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5222 {
5223   return (__m512h)
5224     __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5225 					 (__v32hf) __B,
5226 					 (__v32hf) __C,
5227 					 (__mmask32) __U,
5228 					 _MM_FROUND_CUR_DIRECTION);
5229 }
5230 
5231 #ifdef __OPTIMIZE__
5232 extern __inline __m512h
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmaddsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5234 _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5235 {
5236   return (__m512h)
5237     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5238 					(__v32hf) __B,
5239 					(__v32hf) __C,
5240 					(__mmask32) -1, __R);
5241 }
5242 
5243 extern __inline __m512h
5244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmaddsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5245 _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5246 			       __m512h __C, const int __R)
5247 {
5248   return (__m512h)
5249     __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5250 					(__v32hf) __B,
5251 					(__v32hf) __C,
5252 					(__mmask32) __U, __R);
5253 }
5254 
5255 extern __inline __m512h
5256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmaddsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5257 _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5258 				__mmask32 __U, const int __R)
5259 {
5260   return (__m512h)
5261     __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5262 					 (__v32hf) __B,
5263 					 (__v32hf) __C,
5264 					 (__mmask32) __U, __R);
5265 }
5266 
5267 extern __inline __m512h
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmaddsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5269 _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5270 				__m512h __C, const int __R)
5271 {
5272   return (__m512h)
5273     __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5274 					 (__v32hf) __B,
5275 					 (__v32hf) __C,
5276 					 (__mmask32) __U, __R);
5277 }
5278 
5279 #else
5280 #define _mm512_fmaddsub_round_ph(A, B, C, R)				\
5281   ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
5282 
5283 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R)			\
5284   ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
5285 
5286 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R)			\
5287   ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
5288 
5289 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R)			\
5290   ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
5291 
5292 #endif /* __OPTIMIZE__ */
5293 
5294 /* Intrinsics vfmsubadd[132,213,231]ph.  */
5295 extern __inline __m512h
5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsubadd_ph(__m512h __A,__m512h __B,__m512h __C)5297   _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
5298 {
5299   return (__m512h)
5300     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5301 					(__v32hf) __B,
5302 					(__v32hf) __C,
5303 					(__mmask32) -1,
5304 					_MM_FROUND_CUR_DIRECTION);
5305 }
5306 
5307 extern __inline __m512h
5308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsubadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5309 _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
5310 			 __m512h __B, __m512h __C)
5311 {
5312   return (__m512h)
5313     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5314 					(__v32hf) __B,
5315 					(__v32hf) __C,
5316 					(__mmask32) __U,
5317 					_MM_FROUND_CUR_DIRECTION);
5318 }
5319 
5320 extern __inline __m512h
5321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsubadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5322 _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
5323 			  __m512h __C, __mmask32 __U)
5324 {
5325   return (__m512h)
5326     __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5327 					 (__v32hf) __B,
5328 					 (__v32hf) __C,
5329 					 (__mmask32) __U,
5330 					 _MM_FROUND_CUR_DIRECTION);
5331 }
5332 
5333 extern __inline __m512h
5334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsubadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5335 _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
5336 			  __m512h __B, __m512h __C)
5337 {
5338   return (__m512h)
5339     __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5340 					 (__v32hf) __B,
5341 					 (__v32hf) __C,
5342 					 (__mmask32) __U,
5343 					 _MM_FROUND_CUR_DIRECTION);
5344 }
5345 
5346 #ifdef __OPTIMIZE__
5347 extern __inline __m512h
5348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsubadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5349 _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
5350 			  __m512h __C, const int __R)
5351 {
5352   return (__m512h)
5353     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5354 					(__v32hf) __B,
5355 					(__v32hf) __C,
5356 					(__mmask32) -1, __R);
5357 }
5358 
5359 extern __inline __m512h
5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsubadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5361 _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5362 			       __m512h __C, const int __R)
5363 {
5364   return (__m512h)
5365     __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5366 					(__v32hf) __B,
5367 					(__v32hf) __C,
5368 					(__mmask32) __U, __R);
5369 }
5370 
5371 extern __inline __m512h
5372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsubadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5373 _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5374 				__mmask32 __U, const int __R)
5375 {
5376   return (__m512h)
5377     __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5378 					 (__v32hf) __B,
5379 					 (__v32hf) __C,
5380 					 (__mmask32) __U, __R);
5381 }
5382 
5383 extern __inline __m512h
5384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsubadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5385 _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5386 				__m512h __C, const int __R)
5387 {
5388   return (__m512h)
5389     __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5390 					 (__v32hf) __B,
5391 					 (__v32hf) __C,
5392 					 (__mmask32) __U, __R);
5393 }
5394 
5395 #else
5396 #define _mm512_fmsubadd_round_ph(A, B, C, R)				\
5397   ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
5398 
5399 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R)			\
5400   ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
5401 
5402 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R)			\
5403   ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
5404 
5405 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R)			\
5406   ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
5407 
5408 #endif /* __OPTIMIZE__ */
5409 
5410 /* Intrinsics vfmadd[132,213,231]ph.  */
5411 extern __inline __m512h
5412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_ph(__m512h __A,__m512h __B,__m512h __C)5413   _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5414 {
5415   return (__m512h)
5416     __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5417 				     (__v32hf) __B,
5418 				     (__v32hf) __C,
5419 				     (__mmask32) -1,
5420 				     _MM_FROUND_CUR_DIRECTION);
5421 }
5422 
5423 extern __inline __m512h
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5425 _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5426 {
5427   return (__m512h)
5428     __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5429 				     (__v32hf) __B,
5430 				     (__v32hf) __C,
5431 				     (__mmask32) __U,
5432 				     _MM_FROUND_CUR_DIRECTION);
5433 }
5434 
5435 extern __inline __m512h
5436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5437 _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5438 {
5439   return (__m512h)
5440     __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5441 				      (__v32hf) __B,
5442 				      (__v32hf) __C,
5443 				      (__mmask32) __U,
5444 				      _MM_FROUND_CUR_DIRECTION);
5445 }
5446 
5447 extern __inline __m512h
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5449 _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5450 {
5451   return (__m512h)
5452     __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5453 				      (__v32hf) __B,
5454 				      (__v32hf) __C,
5455 				      (__mmask32) __U,
5456 				      _MM_FROUND_CUR_DIRECTION);
5457 }
5458 
5459 #ifdef __OPTIMIZE__
5460 extern __inline __m512h
5461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5462 _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5463 {
5464   return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5465 						       (__v32hf) __B,
5466 						       (__v32hf) __C,
5467 						       (__mmask32) -1, __R);
5468 }
5469 
5470 extern __inline __m512h
5471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5472 _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5473 			       __m512h __C, const int __R)
5474 {
5475   return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5476 						       (__v32hf) __B,
5477 						       (__v32hf) __C,
5478 						       (__mmask32) __U, __R);
5479 }
5480 
5481 extern __inline __m512h
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5483 _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5484 				__mmask32 __U, const int __R)
5485 {
5486   return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5487 							(__v32hf) __B,
5488 							(__v32hf) __C,
5489 							(__mmask32) __U, __R);
5490 }
5491 
5492 extern __inline __m512h
5493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5494 _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5495 				__m512h __C, const int __R)
5496 {
5497   return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5498 							(__v32hf) __B,
5499 							(__v32hf) __C,
5500 							(__mmask32) __U, __R);
5501 }
5502 
5503 #else
5504 #define _mm512_fmadd_round_ph(A, B, C, R)				\
5505   ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
5506 
5507 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R)			\
5508   ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
5509 
5510 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R)			\
5511   ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
5512 
5513 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R)			\
5514   ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
5515 
5516 #endif /* __OPTIMIZE__ */
5517 
5518 /* Intrinsics vfnmadd[132,213,231]ph.  */
5519 extern __inline __m512h
5520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmadd_ph(__m512h __A,__m512h __B,__m512h __C)5521 _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5522 {
5523   return (__m512h)
5524     __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5525 				      (__v32hf) __B,
5526 				      (__v32hf) __C,
5527 				      (__mmask32) -1,
5528 				      _MM_FROUND_CUR_DIRECTION);
5529 }
5530 
5531 extern __inline __m512h
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5533 _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5534 {
5535   return (__m512h)
5536     __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5537 				      (__v32hf) __B,
5538 				      (__v32hf) __C,
5539 				      (__mmask32) __U,
5540 				      _MM_FROUND_CUR_DIRECTION);
5541 }
5542 
5543 extern __inline __m512h
5544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5545 _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5546 {
5547   return (__m512h)
5548     __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5549 				       (__v32hf) __B,
5550 				       (__v32hf) __C,
5551 				       (__mmask32) __U,
5552 				       _MM_FROUND_CUR_DIRECTION);
5553 }
5554 
5555 extern __inline __m512h
5556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5557 _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5558 {
5559   return (__m512h)
5560     __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5561 				       (__v32hf) __B,
5562 				       (__v32hf) __C,
5563 				       (__mmask32) __U,
5564 				       _MM_FROUND_CUR_DIRECTION);
5565 }
5566 
5567 #ifdef __OPTIMIZE__
5568 extern __inline __m512h
5569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5570 _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5571 {
5572   return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5573 						       (__v32hf) __B,
5574 						       (__v32hf) __C,
5575 						       (__mmask32) -1, __R);
5576 }
5577 
5578 extern __inline __m512h
5579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5580 _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5581 			       __m512h __C, const int __R)
5582 {
5583   return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5584 						       (__v32hf) __B,
5585 						       (__v32hf) __C,
5586 						       (__mmask32) __U, __R);
5587 }
5588 
5589 extern __inline __m512h
5590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5591 _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5592 				__mmask32 __U, const int __R)
5593 {
5594   return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5595 							(__v32hf) __B,
5596 							(__v32hf) __C,
5597 							(__mmask32) __U, __R);
5598 }
5599 
5600 extern __inline __m512h
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5602 _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5603 				__m512h __C, const int __R)
5604 {
5605   return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5606 							(__v32hf) __B,
5607 							(__v32hf) __C,
5608 							(__mmask32) __U, __R);
5609 }
5610 
5611 #else
5612 #define _mm512_fnmadd_round_ph(A, B, C, R)				\
5613   ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
5614 
5615 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R)			\
5616   ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
5617 
5618 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R)			\
5619   ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
5620 
5621 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R)			\
5622   ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
5623 
5624 #endif /* __OPTIMIZE__ */
5625 
5626 /* Intrinsics vfmsub[132,213,231]ph.  */
5627 extern __inline __m512h
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_ph(__m512h __A,__m512h __B,__m512h __C)5629 _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5630 {
5631   return (__m512h)
5632     __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5633 				     (__v32hf) __B,
5634 				     (__v32hf) __C,
5635 				     (__mmask32) -1,
5636 				     _MM_FROUND_CUR_DIRECTION);
5637 }
5638 
5639 extern __inline __m512h
5640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5641 _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5642 {
5643   return (__m512h)
5644     __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5645 				     (__v32hf) __B,
5646 				     (__v32hf) __C,
5647 				     (__mmask32) __U,
5648 				     _MM_FROUND_CUR_DIRECTION);
5649 }
5650 
5651 extern __inline __m512h
5652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5653 _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5654 {
5655   return (__m512h)
5656     __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5657 				      (__v32hf) __B,
5658 				      (__v32hf) __C,
5659 				      (__mmask32) __U,
5660 				      _MM_FROUND_CUR_DIRECTION);
5661 }
5662 
5663 extern __inline __m512h
5664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5665 _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5666 {
5667   return (__m512h)
5668     __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5669 				      (__v32hf) __B,
5670 				      (__v32hf) __C,
5671 				      (__mmask32) __U,
5672 				      _MM_FROUND_CUR_DIRECTION);
5673 }
5674 
5675 #ifdef __OPTIMIZE__
5676 extern __inline __m512h
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5678 _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5679 {
5680   return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5681 						       (__v32hf) __B,
5682 						       (__v32hf) __C,
5683 						       (__mmask32) -1, __R);
5684 }
5685 
5686 extern __inline __m512h
5687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5688 _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5689 			       __m512h __C, const int __R)
5690 {
5691   return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5692 						       (__v32hf) __B,
5693 						       (__v32hf) __C,
5694 						       (__mmask32) __U, __R);
5695 }
5696 
5697 extern __inline __m512h
5698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5699 _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5700 				__mmask32 __U, const int __R)
5701 {
5702   return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5703 							(__v32hf) __B,
5704 							(__v32hf) __C,
5705 							(__mmask32) __U, __R);
5706 }
5707 
5708 extern __inline __m512h
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5710 _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5711 				__m512h __C, const int __R)
5712 {
5713   return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5714 							(__v32hf) __B,
5715 							(__v32hf) __C,
5716 							(__mmask32) __U, __R);
5717 }
5718 
5719 #else
5720 #define _mm512_fmsub_round_ph(A, B, C, R)				\
5721   ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
5722 
5723 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R)			\
5724   ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
5725 
5726 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R)			\
5727   ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
5728 
5729 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R)			\
5730   ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
5731 
5732 #endif /* __OPTIMIZE__ */
5733 
5734 /* Intrinsics vfnmsub[132,213,231]ph.  */
5735 extern __inline __m512h
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_ph(__m512h __A,__m512h __B,__m512h __C)5737 _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5738 {
5739   return (__m512h)
5740     __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5741 				      (__v32hf) __B,
5742 				      (__v32hf) __C,
5743 				      (__mmask32) -1,
5744 				      _MM_FROUND_CUR_DIRECTION);
5745 }
5746 
5747 extern __inline __m512h
5748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5749 _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5750 {
5751   return (__m512h)
5752     __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5753 				      (__v32hf) __B,
5754 				      (__v32hf) __C,
5755 				      (__mmask32) __U,
5756 				      _MM_FROUND_CUR_DIRECTION);
5757 }
5758 
5759 extern __inline __m512h
5760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5761 _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5762 {
5763   return (__m512h)
5764     __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5765 				       (__v32hf) __B,
5766 				       (__v32hf) __C,
5767 				       (__mmask32) __U,
5768 				       _MM_FROUND_CUR_DIRECTION);
5769 }
5770 
5771 extern __inline __m512h
5772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5773 _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5774 {
5775   return (__m512h)
5776     __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5777 				       (__v32hf) __B,
5778 				       (__v32hf) __C,
5779 				       (__mmask32) __U,
5780 				       _MM_FROUND_CUR_DIRECTION);
5781 }
5782 
5783 #ifdef __OPTIMIZE__
5784 extern __inline __m512h
5785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5786 _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5787 {
5788   return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5789 						       (__v32hf) __B,
5790 						       (__v32hf) __C,
5791 						       (__mmask32) -1, __R);
5792 }
5793 
5794 extern __inline __m512h
5795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5796 _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5797 			       __m512h __C, const int __R)
5798 {
5799   return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5800 						       (__v32hf) __B,
5801 						       (__v32hf) __C,
5802 						       (__mmask32) __U, __R);
5803 }
5804 
5805 extern __inline __m512h
5806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5807 _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5808 				__mmask32 __U, const int __R)
5809 {
5810   return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5811 							(__v32hf) __B,
5812 							(__v32hf) __C,
5813 							(__mmask32) __U, __R);
5814 }
5815 
5816 extern __inline __m512h
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5818 _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5819 				__m512h __C, const int __R)
5820 {
5821   return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5822 							(__v32hf) __B,
5823 							(__v32hf) __C,
5824 							(__mmask32) __U, __R);
5825 }
5826 
5827 #else
5828 #define _mm512_fnmsub_round_ph(A, B, C, R)				\
5829   ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
5830 
5831 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R)			\
5832   ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
5833 
5834 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R)			\
5835   ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
5836 
5837 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R)			\
5838   ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
5839 
5840 #endif /* __OPTIMIZE__ */
5841 
5842 /* Intrinsics vfmadd[132,213,231]sh.  */
5843 extern __inline __m128h
5844   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sh(__m128h __W,__m128h __A,__m128h __B)5845 _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5846 {
5847   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5848 						  (__v8hf) __A,
5849 						  (__v8hf) __B,
5850 						  (__mmask8) -1,
5851 						  _MM_FROUND_CUR_DIRECTION);
5852 }
5853 
5854 extern __inline __m128h
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)5856 _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5857 {
5858   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5859 						  (__v8hf) __A,
5860 						  (__v8hf) __B,
5861 						  (__mmask8) __U,
5862 						  _MM_FROUND_CUR_DIRECTION);
5863 }
5864 
5865 extern __inline __m128h
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)5867 _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5868 {
5869   return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5870 						   (__v8hf) __A,
5871 						   (__v8hf) __B,
5872 						   (__mmask8) __U,
5873 						   _MM_FROUND_CUR_DIRECTION);
5874 }
5875 
5876 extern __inline __m128h
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)5878 _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5879 {
5880   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5881 						   (__v8hf) __A,
5882 						   (__v8hf) __B,
5883 						   (__mmask8) __U,
5884 						   _MM_FROUND_CUR_DIRECTION);
5885 }
5886 
5887 
5888 #ifdef __OPTIMIZE__
5889 extern __inline __m128h
5890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)5891 _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5892 {
5893   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5894 						  (__v8hf) __A,
5895 						  (__v8hf) __B,
5896 						  (__mmask8) -1,
5897 						  __R);
5898 }
5899 
5900 extern __inline __m128h
5901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)5902 _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
5903 			 const int __R)
5904 {
5905   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5906 						  (__v8hf) __A,
5907 						  (__v8hf) __B,
5908 						  (__mmask8) __U, __R);
5909 }
5910 
5911 extern __inline __m128h
5912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)5913 _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
5914 			  const int __R)
5915 {
5916   return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5917 						   (__v8hf) __A,
5918 						   (__v8hf) __B,
5919 						   (__mmask8) __U, __R);
5920 }
5921 
5922 extern __inline __m128h
5923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)5924 _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
5925 			  __m128h __B, const int __R)
5926 {
5927   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5928 						   (__v8hf) __A,
5929 						   (__v8hf) __B,
5930 						   (__mmask8) __U, __R);
5931 }
5932 
5933 #else
5934 #define _mm_fmadd_round_sh(A, B, C, R)					\
5935   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
5936 #define _mm_mask_fmadd_round_sh(A, U, B, C, R)				\
5937   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
5938 #define _mm_mask3_fmadd_round_sh(A, B, C, U, R)				\
5939   ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
5940 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R)				\
5941   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
5942 
5943 #endif /* __OPTIMIZE__ */
5944 
5945 /* Intrinsics vfnmadd[132,213,231]sh.  */
5946 extern __inline __m128h
5947   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_sh(__m128h __W,__m128h __A,__m128h __B)5948 _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5949 {
5950   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5951 						   (__v8hf) __A,
5952 						   (__v8hf) __B,
5953 						   (__mmask8) -1,
5954 						   _MM_FROUND_CUR_DIRECTION);
5955 }
5956 
5957 extern __inline __m128h
5958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)5959 _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5960 {
5961   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5962 						  (__v8hf) __A,
5963 						  (__v8hf) __B,
5964 						  (__mmask8) __U,
5965 						  _MM_FROUND_CUR_DIRECTION);
5966 }
5967 
5968 extern __inline __m128h
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)5970 _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5971 {
5972   return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
5973 						   (__v8hf) __A,
5974 						   (__v8hf) __B,
5975 						   (__mmask8) __U,
5976 						   _MM_FROUND_CUR_DIRECTION);
5977 }
5978 
5979 extern __inline __m128h
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)5981 _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5982 {
5983   return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
5984 						   (__v8hf) __A,
5985 						   (__v8hf) __B,
5986 						   (__mmask8) __U,
5987 						   _MM_FROUND_CUR_DIRECTION);
5988 }
5989 
5990 
5991 #ifdef __OPTIMIZE__
5992 extern __inline __m128h
5993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)5994 _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5995 {
5996   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5997 						   (__v8hf) __A,
5998 						   (__v8hf) __B,
5999 						   (__mmask8) -1,
6000 						   __R);
6001 }
6002 
6003 extern __inline __m128h
6004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6005 _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6006 			 const int __R)
6007 {
6008   return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
6009 						  (__v8hf) __A,
6010 						  (__v8hf) __B,
6011 						  (__mmask8) __U, __R);
6012 }
6013 
6014 extern __inline __m128h
6015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6016 _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6017 			  const int __R)
6018 {
6019   return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
6020 						   (__v8hf) __A,
6021 						   (__v8hf) __B,
6022 						   (__mmask8) __U, __R);
6023 }
6024 
6025 extern __inline __m128h
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6027 _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6028 			  __m128h __B, const int __R)
6029 {
6030   return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
6031 						   (__v8hf) __A,
6032 						   (__v8hf) __B,
6033 						   (__mmask8) __U, __R);
6034 }
6035 
6036 #else
6037 #define _mm_fnmadd_round_sh(A, B, C, R)					\
6038   ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
6039 #define _mm_mask_fnmadd_round_sh(A, U, B, C, R)				\
6040   ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
6041 #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R)			\
6042   ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
6043 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R)			\
6044   ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
6045 
6046 #endif /* __OPTIMIZE__ */
6047 
6048 /* Intrinsics vfmsub[132,213,231]sh.  */
6049 extern __inline __m128h
6050   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_sh(__m128h __W,__m128h __A,__m128h __B)6051 _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6052 {
6053   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6054 						  (__v8hf) __A,
6055 						  -(__v8hf) __B,
6056 						  (__mmask8) -1,
6057 						  _MM_FROUND_CUR_DIRECTION);
6058 }
6059 
6060 extern __inline __m128h
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)6062 _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6063 {
6064   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6065 						  (__v8hf) __A,
6066 						  -(__v8hf) __B,
6067 						  (__mmask8) __U,
6068 						  _MM_FROUND_CUR_DIRECTION);
6069 }
6070 
6071 extern __inline __m128h
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)6073 _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6074 {
6075   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6076 						   (__v8hf) __A,
6077 						   (__v8hf) __B,
6078 						   (__mmask8) __U,
6079 						   _MM_FROUND_CUR_DIRECTION);
6080 }
6081 
6082 extern __inline __m128h
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)6084 _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6085 {
6086   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6087 						   (__v8hf) __A,
6088 						   -(__v8hf) __B,
6089 						   (__mmask8) __U,
6090 						   _MM_FROUND_CUR_DIRECTION);
6091 }
6092 
6093 
6094 #ifdef __OPTIMIZE__
6095 extern __inline __m128h
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)6097 _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6098 {
6099   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6100 						  (__v8hf) __A,
6101 						  -(__v8hf) __B,
6102 						  (__mmask8) -1,
6103 						  __R);
6104 }
6105 
6106 extern __inline __m128h
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6108 _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6109 			 const int __R)
6110 {
6111   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6112 						  (__v8hf) __A,
6113 						  -(__v8hf) __B,
6114 						  (__mmask8) __U, __R);
6115 }
6116 
6117 extern __inline __m128h
6118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6119 _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6120 			  const int __R)
6121 {
6122   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6123 						   (__v8hf) __A,
6124 						   (__v8hf) __B,
6125 						   (__mmask8) __U, __R);
6126 }
6127 
6128 extern __inline __m128h
6129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6130 _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6131 			  __m128h __B, const int __R)
6132 {
6133   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6134 						   (__v8hf) __A,
6135 						   -(__v8hf) __B,
6136 						   (__mmask8) __U, __R);
6137 }
6138 
6139 #else
6140 #define _mm_fmsub_round_sh(A, B, C, R)					\
6141   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
6142 #define _mm_mask_fmsub_round_sh(A, U, B, C, R)				\
6143   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
6144 #define _mm_mask3_fmsub_round_sh(A, B, C, U, R)				\
6145   ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
6146 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R)				\
6147   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
6148 
6149 #endif /* __OPTIMIZE__ */
6150 
6151 /* Intrinsics vfnmsub[132,213,231]sh.  */
6152 extern __inline __m128h
6153   __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_sh(__m128h __W,__m128h __A,__m128h __B)6154 _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6155 {
6156   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6157 						  -(__v8hf) __A,
6158 						  -(__v8hf) __B,
6159 						  (__mmask8) -1,
6160 						  _MM_FROUND_CUR_DIRECTION);
6161 }
6162 
6163 extern __inline __m128h
6164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)6165 _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6166 {
6167   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6168 						  -(__v8hf) __A,
6169 						  -(__v8hf) __B,
6170 						  (__mmask8) __U,
6171 						  _MM_FROUND_CUR_DIRECTION);
6172 }
6173 
6174 extern __inline __m128h
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)6176 _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6177 {
6178   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6179 						   -(__v8hf) __A,
6180 						   (__v8hf) __B,
6181 						   (__mmask8) __U,
6182 						   _MM_FROUND_CUR_DIRECTION);
6183 }
6184 
6185 extern __inline __m128h
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)6187 _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6188 {
6189   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6190 						   -(__v8hf) __A,
6191 						   -(__v8hf) __B,
6192 						   (__mmask8) __U,
6193 						   _MM_FROUND_CUR_DIRECTION);
6194 }
6195 
6196 
6197 #ifdef __OPTIMIZE__
6198 extern __inline __m128h
6199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)6200 _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6201 {
6202   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6203 						  -(__v8hf) __A,
6204 						  -(__v8hf) __B,
6205 						  (__mmask8) -1,
6206 						  __R);
6207 }
6208 
6209 extern __inline __m128h
6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6211 _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6212 			 const int __R)
6213 {
6214   return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6215 						  -(__v8hf) __A,
6216 						  -(__v8hf) __B,
6217 						  (__mmask8) __U, __R);
6218 }
6219 
6220 extern __inline __m128h
6221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6222 _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6223 			  const int __R)
6224 {
6225   return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6226 						   -(__v8hf) __A,
6227 						   (__v8hf) __B,
6228 						   (__mmask8) __U, __R);
6229 }
6230 
6231 extern __inline __m128h
6232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6233 _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6234 			  __m128h __B, const int __R)
6235 {
6236   return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6237 						   -(__v8hf) __A,
6238 						   -(__v8hf) __B,
6239 						   (__mmask8) __U, __R);
6240 }
6241 
6242 #else
6243 #define _mm_fnmsub_round_sh(A, B, C, R)					\
6244   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
6245 #define _mm_mask_fnmsub_round_sh(A, U, B, C, R)				\
6246   ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
6247 #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R)			\
6248   ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
6249 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R)			\
6250   ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
6251 
6252 #endif /* __OPTIMIZE__ */
6253 
6254 /* Intrinsics vf[,c]maddcph.  */
6255 extern __inline __m512h
6256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmadd_pch(__m512h __A,__m512h __B,__m512h __C)6257 _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6258 {
6259   return (__m512h)
6260     __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6261 					(__v32hf) __B,
6262 					(__v32hf) __C,
6263 					_MM_FROUND_CUR_DIRECTION);
6264 }
6265 
6266 extern __inline __m512h
6267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmadd_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6268 _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6269 {
6270   return (__m512h)
6271     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6272 					     (__v32hf) __C,
6273 					     (__v32hf) __D, __B,
6274 					     _MM_FROUND_CUR_DIRECTION);
6275 }
6276 
6277 extern __inline __m512h
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fcmadd_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D)6279 _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6280 {
6281   return (__m512h)
6282     __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6283 					      (__v32hf) __B,
6284 					      (__v32hf) __C,
6285 					      __D, _MM_FROUND_CUR_DIRECTION);
6286 }
6287 
6288 extern __inline __m512h
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmadd_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D)6290 _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6291 {
6292   return (__m512h)
6293     __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6294 					      (__v32hf) __C,
6295 					      (__v32hf) __D,
6296 					      __A, _MM_FROUND_CUR_DIRECTION);
6297 }
6298 
6299 extern __inline __m512h
6300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_pch(__m512h __A,__m512h __B,__m512h __C)6301 _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6302 {
6303   return (__m512h)
6304     __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6305 				       (__v32hf) __B,
6306 				       (__v32hf) __C,
6307 				       _MM_FROUND_CUR_DIRECTION);
6308 }
6309 
6310 extern __inline __m512h
6311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6312 _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6313 {
6314   return (__m512h)
6315     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6316 					    (__v32hf) __C,
6317 					    (__v32hf) __D, __B,
6318 					    _MM_FROUND_CUR_DIRECTION);
6319 }
6320 
6321 extern __inline __m512h
6322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D)6323 _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6324 {
6325   return (__m512h)
6326     __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6327 					     (__v32hf) __B,
6328 					     (__v32hf) __C,
6329 					     __D, _MM_FROUND_CUR_DIRECTION);
6330 }
6331 
6332 extern __inline __m512h
6333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D)6334 _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6335 {
6336   return (__m512h)
6337     __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6338 					     (__v32hf) __C,
6339 					     (__v32hf) __D,
6340 					     __A, _MM_FROUND_CUR_DIRECTION);
6341 }
6342 
6343 #ifdef __OPTIMIZE__
6344 extern __inline __m512h
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,const int __D)6346 _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6347 {
6348   return (__m512h)
6349     __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6350 					(__v32hf) __B,
6351 					(__v32hf) __C,
6352 					__D);
6353 }
6354 
6355 extern __inline __m512h
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmadd_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6357 _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6358 			      __m512h __D, const int __E)
6359 {
6360   return (__m512h)
6361     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6362 					     (__v32hf) __C,
6363 					     (__v32hf) __D, __B,
6364 					     __E);
6365 }
6366 
6367 extern __inline __m512h
6368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fcmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D,const int __E)6369 _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6370 			       __mmask16 __D, const int __E)
6371 {
6372   return (__m512h)
6373     __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6374 					      (__v32hf) __B,
6375 					      (__v32hf) __C,
6376 					      __D, __E);
6377 }
6378 
6379 extern __inline __m512h
6380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmadd_round_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D,const int __E)6381 _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6382 			       __m512h __D, const int __E)
6383 {
6384   return (__m512h)
6385     __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6386 					      (__v32hf) __C,
6387 					      (__v32hf) __D,
6388 					      __A, __E);
6389 }
6390 
6391 extern __inline __m512h
6392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,const int __D)6393 _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6394 {
6395   return (__m512h)
6396     __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6397 				       (__v32hf) __B,
6398 				       (__v32hf) __C,
6399 				       __D);
6400 }
6401 
6402 extern __inline __m512h
6403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6404 _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6405 			     __m512h __D, const int __E)
6406 {
6407   return (__m512h)
6408     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6409 					    (__v32hf) __C,
6410 					    (__v32hf) __D, __B,
6411 					    __E);
6412 }
6413 
6414 extern __inline __m512h
6415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D,const int __E)6416 _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6417 			      __mmask16 __D, const int __E)
6418 {
6419   return (__m512h)
6420     __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6421 					     (__v32hf) __B,
6422 					     (__v32hf) __C,
6423 					     __D, __E);
6424 }
6425 
6426 extern __inline __m512h
6427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D,const int __E)6428 _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6429 			      __m512h __D, const int __E)
6430 {
6431   return (__m512h)
6432     __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6433 					     (__v32hf) __C,
6434 					     (__v32hf) __D,
6435 					     __A, __E);
6436 }
6437 
6438 #else
6439 #define _mm512_fcmadd_round_pch(A, B, C, D)			\
6440   (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
6441 
6442 #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E)			\
6443   ((__m512h) 								\
6444     __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A),		\
6445 					     (__v32hf) (C),		\
6446 					     (__v32hf) (D),		\
6447 					     (B), (E)))
6448 
6449 
6450 #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E)			\
6451   ((__m512h)								\
6452    __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
6453 
6454 #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E)			\
6455   (__m512h)								\
6456    __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6457 
6458 #define _mm512_fmadd_round_pch(A, B, C, D)			\
6459   (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
6460 
6461 #define _mm512_mask_fmadd_round_pch(A, B, C, D, E)			\
6462   ((__m512h)								\
6463     __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A),		\
6464 					    (__v32hf) (C),		\
6465 					    (__v32hf) (D),		\
6466 					    (B), (E)))
6467 
6468 #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E)			\
6469   (__m512h)								\
6470    __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
6471 
6472 #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E)			\
6473   (__m512h)								\
6474    __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6475 
6476 #endif /* __OPTIMIZE__ */
6477 
6478 /* Intrinsics vf[,c]mulcph.  */
6479 extern __inline __m512h
6480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmul_pch(__m512h __A,__m512h __B)6481 _mm512_fcmul_pch (__m512h __A, __m512h __B)
6482 {
6483   return (__m512h)
6484     __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6485 				       (__v32hf) __B,
6486 				       _MM_FROUND_CUR_DIRECTION);
6487 }
6488 
6489 extern __inline __m512h
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmul_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6491 _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6492 {
6493   return (__m512h)
6494     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6495 					    (__v32hf) __D,
6496 					    (__v32hf) __A,
6497 					    __B, _MM_FROUND_CUR_DIRECTION);
6498 }
6499 
6500 extern __inline __m512h
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmul_pch(__mmask16 __A,__m512h __B,__m512h __C)6502 _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6503 {
6504   return (__m512h)
6505     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6506 					    (__v32hf) __C,
6507 					    _mm512_setzero_ph (),
6508 					    __A, _MM_FROUND_CUR_DIRECTION);
6509 }
6510 
6511 extern __inline __m512h
6512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmul_pch(__m512h __A,__m512h __B)6513 _mm512_fmul_pch (__m512h __A, __m512h __B)
6514 {
6515   return (__m512h)
6516     __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6517 				      (__v32hf) __B,
6518 				      _MM_FROUND_CUR_DIRECTION);
6519 }
6520 
6521 extern __inline __m512h
6522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmul_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6523 _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6524 {
6525   return (__m512h)
6526     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6527 					   (__v32hf) __D,
6528 					   (__v32hf) __A,
6529 					   __B, _MM_FROUND_CUR_DIRECTION);
6530 }
6531 
6532 extern __inline __m512h
6533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmul_pch(__mmask16 __A,__m512h __B,__m512h __C)6534 _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6535 {
6536   return (__m512h)
6537     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6538 					   (__v32hf) __C,
6539 					   _mm512_setzero_ph (),
6540 					   __A, _MM_FROUND_CUR_DIRECTION);
6541 }
6542 
6543 #ifdef __OPTIMIZE__
6544 extern __inline __m512h
6545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmul_round_pch(__m512h __A,__m512h __B,const int __D)6546 _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
6547 {
6548   return (__m512h)
6549     __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6550 				       (__v32hf) __B, __D);
6551 }
6552 
6553 extern __inline __m512h
6554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmul_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6555 _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6556 			     __m512h __D, const int __E)
6557 {
6558   return (__m512h)
6559     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6560 					    (__v32hf) __D,
6561 					    (__v32hf) __A,
6562 					    __B, __E);
6563 }
6564 
6565 extern __inline __m512h
6566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmul_round_pch(__mmask16 __A,__m512h __B,__m512h __C,const int __E)6567 _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
6568 			      __m512h __C, const int __E)
6569 {
6570   return (__m512h)
6571     __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6572 					    (__v32hf) __C,
6573 					    _mm512_setzero_ph (),
6574 					    __A, __E);
6575 }
6576 
6577 extern __inline __m512h
6578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmul_round_pch(__m512h __A,__m512h __B,const int __D)6579 _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
6580 {
6581   return (__m512h)
6582     __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6583 				      (__v32hf) __B,
6584 				      __D);
6585 }
6586 
6587 extern __inline __m512h
6588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmul_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6589 _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6590 			    __m512h __D, const int __E)
6591 {
6592   return (__m512h)
6593     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6594 					   (__v32hf) __D,
6595 					   (__v32hf) __A,
6596 					   __B, __E);
6597 }
6598 
6599 extern __inline __m512h
6600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmul_round_pch(__mmask16 __A,__m512h __B,__m512h __C,const int __E)6601 _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
6602 			     __m512h __C, const int __E)
6603 {
6604   return (__m512h)
6605     __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6606 					   (__v32hf) __C,
6607 					   _mm512_setzero_ph (),
6608 					   __A, __E);
6609 }
6610 
6611 #else
6612 #define _mm512_fcmul_round_pch(A, B, D)				\
6613   (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
6614 
6615 #define _mm512_mask_fcmul_round_pch(A, B, C, D, E)			\
6616   (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
6617 
6618 #define _mm512_maskz_fcmul_round_pch(A, B, C, E)			\
6619   (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C),		\
6620 						    (__v32hf)		\
6621 						    _mm512_setzero_ph (), \
6622 						    (A), (E))
6623 
6624 #define _mm512_fmul_round_pch(A, B, D)			\
6625   (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
6626 
6627 #define _mm512_mask_fmul_round_pch(A, B, C, D, E)			  \
6628   (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
6629 
6630 #define _mm512_maskz_fmul_round_pch(A, B, C, E)				  \
6631   (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C),		  \
6632 						   (__v32hf)		  \
6633 						   _mm512_setzero_ph (),  \
6634 						   (A), (E))
6635 
6636 #endif /* __OPTIMIZE__ */
6637 
6638 /* Intrinsics vf[,c]maddcsh.  */
6639 extern __inline __m128h
6640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmadd_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6641 _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6642 {
6643   return (__m128h)
6644     __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6645 					  (__v8hf) __C,
6646 					  (__v8hf) __D, __B,
6647 					  _MM_FROUND_CUR_DIRECTION);
6648 }
6649 
6650 extern __inline __m128h
6651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fcmadd_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D)6652 _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6653 {
6654   return (__m128h)
6655     __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6656 					   (__v8hf) __B,
6657 					   (__v8hf) __C, __D,
6658 					   _MM_FROUND_CUR_DIRECTION);
6659 }
6660 
6661 extern __inline __m128h
6662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmadd_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D)6663 _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6664 {
6665   return (__m128h)
6666     __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6667 					   (__v8hf) __C,
6668 					   (__v8hf) __D,
6669 					   __A, _MM_FROUND_CUR_DIRECTION);
6670 }
6671 
6672 extern __inline __m128h
6673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmadd_sch(__m128h __A,__m128h __B,__m128h __C)6674 _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6675 {
6676   return (__m128h)
6677     __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6678 				     (__v8hf) __B,
6679 				     (__v8hf) __C,
6680 				     _MM_FROUND_CUR_DIRECTION);
6681 }
6682 
6683 extern __inline __m128h
6684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6685 _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6686 {
6687   return (__m128h)
6688     __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6689 					 (__v8hf) __C,
6690 					 (__v8hf) __D, __B,
6691 					 _MM_FROUND_CUR_DIRECTION);
6692 }
6693 
6694 extern __inline __m128h
6695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D)6696 _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6697 {
6698   return (__m128h)
6699     __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6700 					  (__v8hf) __B,
6701 					  (__v8hf) __C, __D,
6702 					  _MM_FROUND_CUR_DIRECTION);
6703 }
6704 
6705 extern __inline __m128h
6706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D)6707 _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6708 {
6709   return (__m128h)
6710     __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6711 					  (__v8hf) __C,
6712 					  (__v8hf) __D,
6713 					  __A, _MM_FROUND_CUR_DIRECTION);
6714 }
6715 
6716 extern __inline __m128h
6717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sch(__m128h __A,__m128h __B,__m128h __C)6718 _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6719 {
6720   return (__m128h)
6721     __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6722 				    (__v8hf) __B,
6723 				    (__v8hf) __C,
6724 				    _MM_FROUND_CUR_DIRECTION);
6725 }
6726 
6727 #ifdef __OPTIMIZE__
6728 extern __inline __m128h
6729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmadd_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6730 _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6731 			   __m128h __D, const int __E)
6732 {
6733   return (__m128h)
6734     __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6735 					  (__v8hf) __C,
6736 					  (__v8hf) __D,
6737 					  __B, __E);
6738 }
6739 
6740 extern __inline __m128h
6741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fcmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D,const int __E)6742 _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6743 			    __mmask8 __D, const int __E)
6744 {
6745   return (__m128h)
6746     __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6747 					   (__v8hf) __B,
6748 					   (__v8hf) __C,
6749 					   __D, __E);
6750 }
6751 
6752 extern __inline __m128h
6753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmadd_round_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D,const int __E)6754 _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6755 			    __m128h __D, const int __E)
6756 {
6757   return (__m128h)
6758     __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6759 					   (__v8hf) __C,
6760 					   (__v8hf) __D,
6761 					   __A, __E);
6762 }
6763 
6764 extern __inline __m128h
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,const int __D)6766 _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6767 {
6768   return (__m128h)
6769     __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6770 				     (__v8hf) __B,
6771 				     (__v8hf) __C,
6772 				     __D);
6773 }
6774 
6775 extern __inline __m128h
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6777 _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6778 			  __m128h __D, const int __E)
6779 {
6780   return (__m128h)
6781     __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6782 					 (__v8hf) __C,
6783 					 (__v8hf) __D,
6784 					 __B, __E);
6785 }
6786 
6787 extern __inline __m128h
6788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D,const int __E)6789 _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6790 			   __mmask8 __D, const int __E)
6791 {
6792   return (__m128h)
6793     __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6794 					  (__v8hf) __B,
6795 					  (__v8hf) __C,
6796 					  __D, __E);
6797 }
6798 
6799 extern __inline __m128h
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_round_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D,const int __E)6801 _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6802 			   __m128h __D, const int __E)
6803 {
6804   return (__m128h)
6805     __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6806 					  (__v8hf) __C,
6807 					  (__v8hf) __D,
6808 					  __A, __E);
6809 }
6810 
6811 extern __inline __m128h
6812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,const int __D)6813 _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6814 {
6815   return (__m128h)
6816     __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6817 				    (__v8hf) __B,
6818 				    (__v8hf) __C,
6819 				    __D);
6820 }
6821 #else
6822 #define _mm_mask_fcmadd_round_sch(A, B, C, D, E)			\
6823     ((__m128h)								\
6824      __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A),		\
6825 					   (__v8hf) (C),		\
6826 					   (__v8hf) (D),		\
6827 					   (B), (E)))
6828 
6829 
6830 #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E)			\
6831   ((__m128h)								\
6832    __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A),		\
6833 					  (__v8hf) (B),		\
6834 					  (__v8hf) (C),		\
6835 					  (D), (E)))
6836 
6837 #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E)		\
6838   __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6839 
6840 #define _mm_fcmadd_round_sch(A, B, C, D)		\
6841   __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
6842 
6843 #define _mm_mask_fmadd_round_sch(A, B, C, D, E)				\
6844     ((__m128h)								\
6845      __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A),		\
6846 					  (__v8hf) (C),		\
6847 					  (__v8hf) (D),		\
6848 					  (B), (E)))
6849 
6850 #define _mm_mask3_fmadd_round_sch(A, B, C, D, E)			\
6851   ((__m128h)								\
6852    __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A),		\
6853 					 (__v8hf) (B),		\
6854 					 (__v8hf) (C),		\
6855 					 (D), (E)))
6856 
6857 #define _mm_maskz_fmadd_round_sch(A, B, C, D, E)		\
6858   __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6859 
6860 #define _mm_fmadd_round_sch(A, B, C, D)		\
6861   __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
6862 
6863 #endif /* __OPTIMIZE__ */
6864 
6865 /* Intrinsics vf[,c]mulcsh.  */
6866 extern __inline __m128h
6867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmul_sch(__m128h __A,__m128h __B)6868 _mm_fcmul_sch (__m128h __A, __m128h __B)
6869 {
6870   return (__m128h)
6871     __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6872 				    (__v8hf) __B,
6873 				    _MM_FROUND_CUR_DIRECTION);
6874 }
6875 
6876 extern __inline __m128h
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmul_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6878 _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6879 {
6880   return (__m128h)
6881     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6882 					 (__v8hf) __D,
6883 					 (__v8hf) __A,
6884 					 __B, _MM_FROUND_CUR_DIRECTION);
6885 }
6886 
6887 extern __inline __m128h
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmul_sch(__mmask8 __A,__m128h __B,__m128h __C)6889 _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6890 {
6891   return (__m128h)
6892     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6893 					 (__v8hf) __C,
6894 					 _mm_setzero_ph (),
6895 					 __A, _MM_FROUND_CUR_DIRECTION);
6896 }
6897 
6898 extern __inline __m128h
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmul_sch(__m128h __A,__m128h __B)6900 _mm_fmul_sch (__m128h __A, __m128h __B)
6901 {
6902   return (__m128h)
6903     __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6904 				   (__v8hf) __B,
6905 				   _MM_FROUND_CUR_DIRECTION);
6906 }
6907 
6908 extern __inline __m128h
6909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmul_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6910 _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6911 {
6912   return (__m128h)
6913     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6914 					(__v8hf) __D,
6915 					(__v8hf) __A,
6916 					__B, _MM_FROUND_CUR_DIRECTION);
6917 }
6918 
6919 extern __inline __m128h
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmul_sch(__mmask8 __A,__m128h __B,__m128h __C)6921 _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6922 {
6923   return (__m128h)
6924     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6925 					(__v8hf) __C,
6926 					_mm_setzero_ph (),
6927 					__A, _MM_FROUND_CUR_DIRECTION);
6928 }
6929 
6930 #ifdef __OPTIMIZE__
6931 extern __inline __m128h
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmul_round_sch(__m128h __A,__m128h __B,const int __D)6933 _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
6934 {
6935   return (__m128h)
6936     __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6937 				    (__v8hf) __B,
6938 				    __D);
6939 }
6940 
6941 extern __inline __m128h
6942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmul_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6943 _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6944 			  __m128h __D, const int __E)
6945 {
6946   return (__m128h)
6947     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6948 					 (__v8hf) __D,
6949 					 (__v8hf) __A,
6950 					 __B, __E);
6951 }
6952 
6953 extern __inline __m128h
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmul_round_sch(__mmask8 __A,__m128h __B,__m128h __C,const int __E)6955 _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6956 			   const int __E)
6957 {
6958   return (__m128h)
6959     __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6960 					 (__v8hf) __C,
6961 					 _mm_setzero_ph (),
6962 					 __A, __E);
6963 }
6964 
6965 extern __inline __m128h
6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmul_round_sch(__m128h __A,__m128h __B,const int __D)6967 _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
6968 {
6969   return (__m128h)
6970     __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6971 				   (__v8hf) __B, __D);
6972 }
6973 
6974 extern __inline __m128h
6975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmul_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6976 _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6977 			 __m128h __D, const int __E)
6978 {
6979   return (__m128h)
6980     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6981 					(__v8hf) __D,
6982 					(__v8hf) __A,
6983 					__B, __E);
6984 }
6985 
6986 extern __inline __m128h
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmul_round_sch(__mmask8 __A,__m128h __B,__m128h __C,const int __E)6988 _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
6989 {
6990   return (__m128h)
6991     __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6992 					(__v8hf) __C,
6993 					_mm_setzero_ph (),
6994 					__A, __E);
6995 }
6996 
6997 #else
6998 #define _mm_fcmul_round_sch(__A, __B, __D)				\
6999   (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,		\
7000 					    (__v8hf) __B, __D)
7001 
7002 #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E)		\
7003   (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,		\
7004 						 (__v8hf) __D,		\
7005 						 (__v8hf) __A,		\
7006 						 __B, __E)
7007 
7008 #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E)			\
7009   (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,		\
7010 						 (__v8hf) __C,		\
7011 						 _mm_setzero_ph (),	\
7012 						 __A, __E)
7013 
7014 #define _mm_fmul_round_sch(__A, __B, __D)				\
7015   (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A,		\
7016 					   (__v8hf) __B, __D)
7017 
7018 #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E)		\
7019   (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,		\
7020 						(__v8hf) __D,		\
7021 						(__v8hf) __A,		\
7022 						__B, __E)
7023 
7024 #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E)			\
7025   (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,		\
7026 						(__v8hf) __C,		\
7027 						_mm_setzero_ph (),	\
7028 						__A, __E)
7029 
7030 #endif /* __OPTIMIZE__ */
7031 
7032 #define _MM512_REDUCE_OP(op)						\
7033   __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0);	\
7034   __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1);	\
7035   __m256h __T3 = (__T1 op __T2);					\
7036   __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0);	\
7037   __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1);	\
7038   __m128h __T6 = (__T4 op __T5);					\
7039   __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6,		\
7040 		 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 });			\
7041   __m128h __T8 = (__T6 op __T7);					\
7042   __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8,		\
7043 		 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 });			\
7044   __m128h __T10 = __T8 op __T9;					\
7045   return __T10[0] op __T10[1]
7046 
7047 // TODO reduce
7048 extern __inline _Float16
7049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_add_ph(__m512h __A)7050 _mm512_reduce_add_ph (__m512h __A)
7051 {
7052    _MM512_REDUCE_OP (+);
7053 }
7054 
7055 extern __inline _Float16
7056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_mul_ph(__m512h __A)7057 _mm512_reduce_mul_ph (__m512h __A)
7058 {
7059    _MM512_REDUCE_OP (*);
7060 }
7061 
7062 #undef _MM512_REDUCE_OP
7063 
7064 #ifdef __AVX512VL__
7065 
7066 #define _MM512_REDUCE_OP(op)						\
7067   __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0);	\
7068   __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1);	\
7069   __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2,		\
7070 		 _mm256_setzero_ph (), (__mmask16) -1);		\
7071   __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0);	\
7072   __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1);	\
7073   __m128h __T6 = __builtin_ia32_##op##ph128_mask			\
7074 		 (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1);	\
7075   __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6,		\
7076 		 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 });			\
7077   __m128h __T8 = (__m128h)  __builtin_ia32_##op##ph128_mask		\
7078 		 (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1);	\
7079   __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8,		\
7080 		 (__v8hi) { 4, 5 });					\
7081   __m128h __T10 = __builtin_ia32_##op##ph128_mask			\
7082 		  (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1);	\
7083   __m128h __T11 = (__m128h) __builtin_shuffle (__T10,			\
7084 		  (__v8hi) { 1, 0 });					\
7085   __m128h __T12 = __builtin_ia32_##op##ph128_mask			\
7086 		  (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1);	\
7087   return __T12[0]
7088 
7089 #else
7090 
7091 #define _MM512_REDUCE_OP(op)						\
7092   __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A,		\
7093 		 (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 });			\
7094   __m512h __T2 = _mm512_##op##_ph (__A, __T1);				\
7095   __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2,		\
7096 		 (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 });			\
7097   __m512h __T4 = _mm512_##op##_ph (__T2, __T3);			\
7098   __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4,		\
7099 		 (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 });			\
7100   __m512h __T6 = _mm512_##op##_ph (__T4, __T5);			\
7101   __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6,		\
7102 		 (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0,			\
7103 			     0, 0, 0, 0, 0, 0, 0, 0 });		\
7104   __m512h __T8 = _mm512_##op##_ph (__T6, __T7);			\
7105   __m512h __T9 = (__m512h) __builtin_shuffle (__T8,			\
7106 		 (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0,			\
7107 			     0, 0, 0, 0, 0, 0, 0, 0,			\
7108 			     0, 0, 0, 0, 0, 0, 0, 0,			\
7109 			     0, 0, 0, 0, 0, 0, 0, 0 });		\
7110   __m512h __T10 = _mm512_##op##_ph (__T8, __T9);			\
7111   return __T10[0]
7112 #endif
7113 
7114 extern __inline _Float16
7115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_min_ph(__m512h __A)7116 _mm512_reduce_min_ph (__m512h __A)
7117 {
7118   _MM512_REDUCE_OP (min);
7119 }
7120 
7121 extern __inline _Float16
7122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_max_ph(__m512h __A)7123 _mm512_reduce_max_ph (__m512h __A)
7124 {
7125   _MM512_REDUCE_OP (max);
7126 }
7127 
7128 #undef _MM512_REDUCE_OP
7129 
7130 extern __inline __m512h
7131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_blend_ph(__mmask32 __U,__m512h __A,__m512h __W)7132 _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
7133 {
7134   return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
7135 						    (__v32hi) __A,
7136 						    (__mmask32) __U);
7137 
7138 }
7139 
7140 extern __inline __m512h
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutex2var_ph(__m512h __A,__m512i __I,__m512h __B)7142 _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
7143 {
7144   return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
7145 						       (__v32hi) __I,
7146 						       (__v32hi) __B,
7147 						       (__mmask32)-1);
7148 }
7149 
7150 extern __inline __m512h
7151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutexvar_ph(__m512i __A,__m512h __B)7152 _mm512_permutexvar_ph (__m512i __A, __m512h __B)
7153 {
7154   return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
7155 						     (__v32hi) __A,
7156 						     (__v32hi)
7157 						     (_mm512_setzero_ph ()),
7158 						     (__mmask32)-1);
7159 }
7160 
7161 extern __inline __m512h
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pch(_Float16 _Complex __A)7163 _mm512_set1_pch (_Float16 _Complex __A)
7164 {
7165   union
7166   {
7167     _Float16 _Complex __a;
7168     float __b;
7169   } __u = { .__a = __A};
7170 
7171   return (__m512h) _mm512_set1_ps (__u.__b);
7172 }
7173 
7174 // intrinsics below are alias for f*mul_*ch
7175 #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
7176 #define _mm512_mask_mul_pch(W, U, A, B)				      \
7177   _mm512_mask_fmul_pch ((W), (U), (A), (B))
7178 #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
7179 #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
7180 #define _mm512_mask_mul_round_pch(W, U, A, B, R)		      \
7181   _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
7182 #define _mm512_maskz_mul_round_pch(U, A, B, R)			      \
7183   _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
7184 
7185 #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
7186 #define _mm512_mask_cmul_pch(W, U, A, B)			      \
7187   _mm512_mask_fcmul_pch ((W), (U), (A), (B))
7188 #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
7189 #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
7190 #define _mm512_mask_cmul_round_pch(W, U, A, B, R)		      \
7191   _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
7192 #define _mm512_maskz_cmul_round_pch(U, A, B, R)			      \
7193   _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
7194 
7195 #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
7196 #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
7197 #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
7198 #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
7199 #define _mm_mask_mul_round_sch(W, U, A, B, R)			      \
7200   _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
7201 #define _mm_maskz_mul_round_sch(U, A, B, R)			      \
7202   _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
7203 
7204 #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
7205 #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
7206 #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
7207 #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
7208 #define _mm_mask_cmul_round_sch(W, U, A, B, R)			      \
7209   _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
7210 #define _mm_maskz_cmul_round_sch(U, A, B, R)			      \
7211   _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
7212 
7213 #ifdef __DISABLE_AVX512FP16__
7214 #undef __DISABLE_AVX512FP16__
7215 #pragma GCC pop_options
7216 #endif /* __DISABLE_AVX512FP16__ */
7217 
7218 #endif /* __AVX512FP16INTRIN_H_INCLUDED */
7219