1 /* Copyright (C) 2019-2022 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef __AVX512FP16INTRIN_H_INCLUDED
29 #define __AVX512FP16INTRIN_H_INCLUDED
30
31 #ifndef __AVX512FP16__
32 #pragma GCC push_options
33 #pragma GCC target("avx512fp16")
34 #define __DISABLE_AVX512FP16__
35 #endif /* __AVX512FP16__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
39 typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
40 typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
41
42 /* The Intel API is flexible enough that we must allow aliasing with other
43 vector types, and their scalar components. */
44 typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
45 typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
46 typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
47
48 /* Unaligned version of the same type. */
49 typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
50 __may_alias__, __aligned__ (1)));
51 typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
52 __may_alias__, __aligned__ (1)));
53 typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
54 __may_alias__, __aligned__ (1)));
55
56 extern __inline __m128h
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ph(_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)58 _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
59 _Float16 __A4, _Float16 __A3, _Float16 __A2,
60 _Float16 __A1, _Float16 __A0)
61 {
62 return __extension__ (__m128h)(__v8hf){ __A0, __A1, __A2, __A3,
63 __A4, __A5, __A6, __A7 };
64 }
65
66 extern __inline __m256h
67 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set_ph(_Float16 __A15,_Float16 __A14,_Float16 __A13,_Float16 __A12,_Float16 __A11,_Float16 __A10,_Float16 __A9,_Float16 __A8,_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)68 _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
69 _Float16 __A12, _Float16 __A11, _Float16 __A10,
70 _Float16 __A9, _Float16 __A8, _Float16 __A7,
71 _Float16 __A6, _Float16 __A5, _Float16 __A4,
72 _Float16 __A3, _Float16 __A2, _Float16 __A1,
73 _Float16 __A0)
74 {
75 return __extension__ (__m256h)(__v16hf){ __A0, __A1, __A2, __A3,
76 __A4, __A5, __A6, __A7,
77 __A8, __A9, __A10, __A11,
78 __A12, __A13, __A14, __A15 };
79 }
80
81 extern __inline __m512h
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_ph(_Float16 __A31,_Float16 __A30,_Float16 __A29,_Float16 __A28,_Float16 __A27,_Float16 __A26,_Float16 __A25,_Float16 __A24,_Float16 __A23,_Float16 __A22,_Float16 __A21,_Float16 __A20,_Float16 __A19,_Float16 __A18,_Float16 __A17,_Float16 __A16,_Float16 __A15,_Float16 __A14,_Float16 __A13,_Float16 __A12,_Float16 __A11,_Float16 __A10,_Float16 __A9,_Float16 __A8,_Float16 __A7,_Float16 __A6,_Float16 __A5,_Float16 __A4,_Float16 __A3,_Float16 __A2,_Float16 __A1,_Float16 __A0)83 _mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
84 _Float16 __A28, _Float16 __A27, _Float16 __A26,
85 _Float16 __A25, _Float16 __A24, _Float16 __A23,
86 _Float16 __A22, _Float16 __A21, _Float16 __A20,
87 _Float16 __A19, _Float16 __A18, _Float16 __A17,
88 _Float16 __A16, _Float16 __A15, _Float16 __A14,
89 _Float16 __A13, _Float16 __A12, _Float16 __A11,
90 _Float16 __A10, _Float16 __A9, _Float16 __A8,
91 _Float16 __A7, _Float16 __A6, _Float16 __A5,
92 _Float16 __A4, _Float16 __A3, _Float16 __A2,
93 _Float16 __A1, _Float16 __A0)
94 {
95 return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
96 __A4, __A5, __A6, __A7,
97 __A8, __A9, __A10, __A11,
98 __A12, __A13, __A14, __A15,
99 __A16, __A17, __A18, __A19,
100 __A20, __A21, __A22, __A23,
101 __A24, __A25, __A26, __A27,
102 __A28, __A29, __A30, __A31 };
103 }
104
105 /* Create vectors of elements in the reversed order from _mm_set_ph,
106 _mm256_set_ph and _mm512_set_ph functions. */
107
108 extern __inline __m128h
109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7)110 _mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
111 _Float16 __A3, _Float16 __A4, _Float16 __A5,
112 _Float16 __A6, _Float16 __A7)
113 {
114 return _mm_set_ph (__A7, __A6, __A5, __A4, __A3, __A2, __A1, __A0);
115 }
116
117 extern __inline __m256h
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7,_Float16 __A8,_Float16 __A9,_Float16 __A10,_Float16 __A11,_Float16 __A12,_Float16 __A13,_Float16 __A14,_Float16 __A15)119 _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
120 _Float16 __A3, _Float16 __A4, _Float16 __A5,
121 _Float16 __A6, _Float16 __A7, _Float16 __A8,
122 _Float16 __A9, _Float16 __A10, _Float16 __A11,
123 _Float16 __A12, _Float16 __A13, _Float16 __A14,
124 _Float16 __A15)
125 {
126 return _mm256_set_ph (__A15, __A14, __A13, __A12, __A11, __A10, __A9,
127 __A8, __A7, __A6, __A5, __A4, __A3, __A2, __A1,
128 __A0);
129 }
130
131 extern __inline __m512h
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setr_ph(_Float16 __A0,_Float16 __A1,_Float16 __A2,_Float16 __A3,_Float16 __A4,_Float16 __A5,_Float16 __A6,_Float16 __A7,_Float16 __A8,_Float16 __A9,_Float16 __A10,_Float16 __A11,_Float16 __A12,_Float16 __A13,_Float16 __A14,_Float16 __A15,_Float16 __A16,_Float16 __A17,_Float16 __A18,_Float16 __A19,_Float16 __A20,_Float16 __A21,_Float16 __A22,_Float16 __A23,_Float16 __A24,_Float16 __A25,_Float16 __A26,_Float16 __A27,_Float16 __A28,_Float16 __A29,_Float16 __A30,_Float16 __A31)133 _mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
134 _Float16 __A3, _Float16 __A4, _Float16 __A5,
135 _Float16 __A6, _Float16 __A7, _Float16 __A8,
136 _Float16 __A9, _Float16 __A10, _Float16 __A11,
137 _Float16 __A12, _Float16 __A13, _Float16 __A14,
138 _Float16 __A15, _Float16 __A16, _Float16 __A17,
139 _Float16 __A18, _Float16 __A19, _Float16 __A20,
140 _Float16 __A21, _Float16 __A22, _Float16 __A23,
141 _Float16 __A24, _Float16 __A25, _Float16 __A26,
142 _Float16 __A27, _Float16 __A28, _Float16 __A29,
143 _Float16 __A30, _Float16 __A31)
144
145 {
146 return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
147 __A24, __A23, __A22, __A21, __A20, __A19, __A18,
148 __A17, __A16, __A15, __A14, __A13, __A12, __A11,
149 __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
150 __A2, __A1, __A0);
151 }
152
153 /* Broadcast _Float16 to vector. */
154
155 extern __inline __m128h
156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_ph(_Float16 __A)157 _mm_set1_ph (_Float16 __A)
158 {
159 return _mm_set_ph (__A, __A, __A, __A, __A, __A, __A, __A);
160 }
161
162 extern __inline __m256h
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_set1_ph(_Float16 __A)164 _mm256_set1_ph (_Float16 __A)
165 {
166 return _mm256_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
167 __A, __A, __A, __A, __A, __A, __A, __A);
168 }
169
170 extern __inline __m512h
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_ph(_Float16 __A)172 _mm512_set1_ph (_Float16 __A)
173 {
174 return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
175 __A, __A, __A, __A, __A, __A, __A, __A,
176 __A, __A, __A, __A, __A, __A, __A, __A,
177 __A, __A, __A, __A, __A, __A, __A, __A);
178 }
179
180 /* Create a vector with all zeros. */
181
182 extern __inline __m128h
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ph(void)184 _mm_setzero_ph (void)
185 {
186 return _mm_set1_ph (0.0f);
187 }
188
189 extern __inline __m256h
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_ph(void)191 _mm256_setzero_ph (void)
192 {
193 return _mm256_set1_ph (0.0f);
194 }
195
196 extern __inline __m512h
197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ph(void)198 _mm512_setzero_ph (void)
199 {
200 return _mm512_set1_ph (0.0f);
201 }
202
203 extern __inline __m128h
204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_ph(void)205 _mm_undefined_ph (void)
206 {
207 #pragma GCC diagnostic push
208 #pragma GCC diagnostic ignored "-Winit-self"
209 __m128h __Y = __Y;
210 #pragma GCC diagnostic pop
211 return __Y;
212 }
213
214 extern __inline __m256h
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_undefined_ph(void)216 _mm256_undefined_ph (void)
217 {
218 #pragma GCC diagnostic push
219 #pragma GCC diagnostic ignored "-Winit-self"
220 __m256h __Y = __Y;
221 #pragma GCC diagnostic pop
222 return __Y;
223 }
224
225 extern __inline __m512h
226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_ph(void)227 _mm512_undefined_ph (void)
228 {
229 #pragma GCC diagnostic push
230 #pragma GCC diagnostic ignored "-Winit-self"
231 __m512h __Y = __Y;
232 #pragma GCC diagnostic pop
233 return __Y;
234 }
235
236 extern __inline _Float16
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_h(__m128h __A)238 _mm_cvtsh_h (__m128h __A)
239 {
240 return __A[0];
241 }
242
243 extern __inline _Float16
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsh_h(__m256h __A)245 _mm256_cvtsh_h (__m256h __A)
246 {
247 return __A[0];
248 }
249
250 extern __inline _Float16
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsh_h(__m512h __A)252 _mm512_cvtsh_h (__m512h __A)
253 {
254 return __A[0];
255 }
256
257 extern __inline __m512
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_ps(__m512h __a)259 _mm512_castph_ps (__m512h __a)
260 {
261 return (__m512) __a;
262 }
263
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_pd(__m512h __a)266 _mm512_castph_pd (__m512h __a)
267 {
268 return (__m512d) __a;
269 }
270
271 extern __inline __m512i
272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph_si512(__m512h __a)273 _mm512_castph_si512 (__m512h __a)
274 {
275 return (__m512i) __a;
276 }
277
278 extern __inline __m128h
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph512_ph128(__m512h __A)280 _mm512_castph512_ph128 (__m512h __A)
281 {
282 union
283 {
284 __m128h __a[4];
285 __m512h __v;
286 } __u = { .__v = __A };
287 return __u.__a[0];
288 }
289
290 extern __inline __m256h
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph512_ph256(__m512h __A)292 _mm512_castph512_ph256 (__m512h __A)
293 {
294 union
295 {
296 __m256h __a[2];
297 __m512h __v;
298 } __u = { .__v = __A };
299 return __u.__a[0];
300 }
301
302 extern __inline __m512h
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph128_ph512(__m128h __A)304 _mm512_castph128_ph512 (__m128h __A)
305 {
306 union
307 {
308 __m128h __a[4];
309 __m512h __v;
310 } __u;
311 __u.__a[0] = __A;
312 return __u.__v;
313 }
314
315 extern __inline __m512h
316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castph256_ph512(__m256h __A)317 _mm512_castph256_ph512 (__m256h __A)
318 {
319 union
320 {
321 __m256h __a[2];
322 __m512h __v;
323 } __u;
324 __u.__a[0] = __A;
325 return __u.__v;
326 }
327
328 extern __inline __m512h
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_zextph128_ph512(__m128h __A)330 _mm512_zextph128_ph512 (__m128h __A)
331 {
332 return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
333 (__m128) __A, 0);
334 }
335
336 extern __inline __m512h
337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_zextph256_ph512(__m256h __A)338 _mm512_zextph256_ph512 (__m256h __A)
339 {
340 return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
341 (__m256d) __A, 0);
342 }
343
344 extern __inline __m512h
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castps_ph(__m512 __a)346 _mm512_castps_ph (__m512 __a)
347 {
348 return (__m512h) __a;
349 }
350
351 extern __inline __m512h
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castpd_ph(__m512d __a)353 _mm512_castpd_ph (__m512d __a)
354 {
355 return (__m512h) __a;
356 }
357
358 extern __inline __m512h
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_castsi512_ph(__m512i __a)360 _mm512_castsi512_ph (__m512i __a)
361 {
362 return (__m512h) __a;
363 }
364
365 /* Create a vector with element 0 as F and the rest zero. */
366 extern __inline __m128h
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sh(_Float16 __F)368 _mm_set_sh (_Float16 __F)
369 {
370 return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F);
371 }
372
373 /* Create a vector with element 0 as *P and the rest zero. */
374 extern __inline __m128h
375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sh(void const * __P)376 _mm_load_sh (void const *__P)
377 {
378 return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
379 *(_Float16 const *) __P);
380 }
381
382 extern __inline __m512h
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_ph(void const * __P)384 _mm512_load_ph (void const *__P)
385 {
386 return *(const __m512h *) __P;
387 }
388
389 extern __inline __m256h
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_ph(void const * __P)391 _mm256_load_ph (void const *__P)
392 {
393 return *(const __m256h *) __P;
394 }
395
396 extern __inline __m128h
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ph(void const * __P)398 _mm_load_ph (void const *__P)
399 {
400 return *(const __m128h *) __P;
401 }
402
403 extern __inline __m512h
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_ph(void const * __P)405 _mm512_loadu_ph (void const *__P)
406 {
407 return *(const __m512h_u *) __P;
408 }
409
410 extern __inline __m256h
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_ph(void const * __P)412 _mm256_loadu_ph (void const *__P)
413 {
414 return *(const __m256h_u *) __P;
415 }
416
417 extern __inline __m128h
418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_ph(void const * __P)419 _mm_loadu_ph (void const *__P)
420 {
421 return *(const __m128h_u *) __P;
422 }
423
424 /* Stores the lower _Float16 value. */
425 extern __inline void
426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sh(void * __P,__m128h __A)427 _mm_store_sh (void *__P, __m128h __A)
428 {
429 *(_Float16 *) __P = ((__v8hf)__A)[0];
430 }
431
432 extern __inline void
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_ph(void * __P,__m512h __A)434 _mm512_store_ph (void *__P, __m512h __A)
435 {
436 *(__m512h *) __P = __A;
437 }
438
439 extern __inline void
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_ph(void * __P,__m256h __A)441 _mm256_store_ph (void *__P, __m256h __A)
442 {
443 *(__m256h *) __P = __A;
444 }
445
446 extern __inline void
447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ph(void * __P,__m128h __A)448 _mm_store_ph (void *__P, __m128h __A)
449 {
450 *(__m128h *) __P = __A;
451 }
452
453 extern __inline void
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_storeu_ph(void * __P,__m512h __A)455 _mm512_storeu_ph (void *__P, __m512h __A)
456 {
457 *(__m512h_u *) __P = __A;
458 }
459
460 extern __inline void
461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_ph(void * __P,__m256h __A)462 _mm256_storeu_ph (void *__P, __m256h __A)
463 {
464 *(__m256h_u *) __P = __A;
465 }
466
467 extern __inline void
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_ph(void * __P,__m128h __A)469 _mm_storeu_ph (void *__P, __m128h __A)
470 {
471 *(__m128h_u *) __P = __A;
472 }
473
474 extern __inline __m512h
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_abs_ph(__m512h __A)476 _mm512_abs_ph (__m512h __A)
477 {
478 return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
479 (__m512i) __A);
480 }
481
482 /* Intrinsics v[add,sub,mul,div]ph. */
483 extern __inline __m512h
484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_ph(__m512h __A,__m512h __B)485 _mm512_add_ph (__m512h __A, __m512h __B)
486 {
487 return (__m512h) ((__v32hf) __A + (__v32hf) __B);
488 }
489
490 extern __inline __m512h
491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)492 _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
493 {
494 return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
495 }
496
497 extern __inline __m512h
498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_ph(__mmask32 __A,__m512h __B,__m512h __C)499 _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
500 {
501 return __builtin_ia32_addph512_mask (__B, __C,
502 _mm512_setzero_ph (), __A);
503 }
504
505 extern __inline __m512h
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_ph(__m512h __A,__m512h __B)507 _mm512_sub_ph (__m512h __A, __m512h __B)
508 {
509 return (__m512h) ((__v32hf) __A - (__v32hf) __B);
510 }
511
512 extern __inline __m512h
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)514 _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
515 {
516 return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
517 }
518
519 extern __inline __m512h
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_ph(__mmask32 __A,__m512h __B,__m512h __C)521 _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
522 {
523 return __builtin_ia32_subph512_mask (__B, __C,
524 _mm512_setzero_ph (), __A);
525 }
526
527 extern __inline __m512h
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_ph(__m512h __A,__m512h __B)529 _mm512_mul_ph (__m512h __A, __m512h __B)
530 {
531 return (__m512h) ((__v32hf) __A * (__v32hf) __B);
532 }
533
534 extern __inline __m512h
535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)536 _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
537 {
538 return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
539 }
540
541 extern __inline __m512h
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_ph(__mmask32 __A,__m512h __B,__m512h __C)543 _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
544 {
545 return __builtin_ia32_mulph512_mask (__B, __C,
546 _mm512_setzero_ph (), __A);
547 }
548
549 extern __inline __m512h
550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_ph(__m512h __A,__m512h __B)551 _mm512_div_ph (__m512h __A, __m512h __B)
552 {
553 return (__m512h) ((__v32hf) __A / (__v32hf) __B);
554 }
555
556 extern __inline __m512h
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)558 _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
559 {
560 return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
561 }
562
563 extern __inline __m512h
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_ph(__mmask32 __A,__m512h __B,__m512h __C)565 _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
566 {
567 return __builtin_ia32_divph512_mask (__B, __C,
568 _mm512_setzero_ph (), __A);
569 }
570
571 #ifdef __OPTIMIZE__
572 extern __inline __m512h
573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_ph(__m512h __A,__m512h __B,const int __C)574 _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
575 {
576 return __builtin_ia32_addph512_mask_round (__A, __B,
577 _mm512_setzero_ph (),
578 (__mmask32) -1, __C);
579 }
580
581 extern __inline __m512h
582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)583 _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
584 __m512h __D, const int __E)
585 {
586 return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
587 }
588
589 extern __inline __m512h
590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)591 _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
592 const int __D)
593 {
594 return __builtin_ia32_addph512_mask_round (__B, __C,
595 _mm512_setzero_ph (),
596 __A, __D);
597 }
598
599 extern __inline __m512h
600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_ph(__m512h __A,__m512h __B,const int __C)601 _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
602 {
603 return __builtin_ia32_subph512_mask_round (__A, __B,
604 _mm512_setzero_ph (),
605 (__mmask32) -1, __C);
606 }
607
608 extern __inline __m512h
609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)610 _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
611 __m512h __D, const int __E)
612 {
613 return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
614 }
615
616 extern __inline __m512h
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)618 _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
619 const int __D)
620 {
621 return __builtin_ia32_subph512_mask_round (__B, __C,
622 _mm512_setzero_ph (),
623 __A, __D);
624 }
625
626 extern __inline __m512h
627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_ph(__m512h __A,__m512h __B,const int __C)628 _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
629 {
630 return __builtin_ia32_mulph512_mask_round (__A, __B,
631 _mm512_setzero_ph (),
632 (__mmask32) -1, __C);
633 }
634
635 extern __inline __m512h
636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)637 _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
638 __m512h __D, const int __E)
639 {
640 return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
641 }
642
643 extern __inline __m512h
644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)645 _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
646 const int __D)
647 {
648 return __builtin_ia32_mulph512_mask_round (__B, __C,
649 _mm512_setzero_ph (),
650 __A, __D);
651 }
652
653 extern __inline __m512h
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_ph(__m512h __A,__m512h __B,const int __C)655 _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
656 {
657 return __builtin_ia32_divph512_mask_round (__A, __B,
658 _mm512_setzero_ph (),
659 (__mmask32) -1, __C);
660 }
661
662 extern __inline __m512h
663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)664 _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
665 __m512h __D, const int __E)
666 {
667 return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
668 }
669
670 extern __inline __m512h
671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)672 _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
673 const int __D)
674 {
675 return __builtin_ia32_divph512_mask_round (__B, __C,
676 _mm512_setzero_ph (),
677 __A, __D);
678 }
679 #else
680 #define _mm512_add_round_ph(A, B, C) \
681 ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
682 _mm512_setzero_ph (), \
683 (__mmask32)-1, (C)))
684
685 #define _mm512_mask_add_round_ph(A, B, C, D, E) \
686 ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
687
688 #define _mm512_maskz_add_round_ph(A, B, C, D) \
689 ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
690 _mm512_setzero_ph (), \
691 (A), (D)))
692
693 #define _mm512_sub_round_ph(A, B, C) \
694 ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
695 _mm512_setzero_ph (), \
696 (__mmask32)-1, (C)))
697
698 #define _mm512_mask_sub_round_ph(A, B, C, D, E) \
699 ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
700
701 #define _mm512_maskz_sub_round_ph(A, B, C, D) \
702 ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
703 _mm512_setzero_ph (), \
704 (A), (D)))
705
706 #define _mm512_mul_round_ph(A, B, C) \
707 ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
708 _mm512_setzero_ph (), \
709 (__mmask32)-1, (C)))
710
711 #define _mm512_mask_mul_round_ph(A, B, C, D, E) \
712 ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
713
714 #define _mm512_maskz_mul_round_ph(A, B, C, D) \
715 ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
716 _mm512_setzero_ph (), \
717 (A), (D)))
718
719 #define _mm512_div_round_ph(A, B, C) \
720 ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
721 _mm512_setzero_ph (), \
722 (__mmask32)-1, (C)))
723
724 #define _mm512_mask_div_round_ph(A, B, C, D, E) \
725 ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
726
727 #define _mm512_maskz_div_round_ph(A, B, C, D) \
728 ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
729 _mm512_setzero_ph (), \
730 (A), (D)))
731 #endif /* __OPTIMIZE__ */
732
733 extern __inline __m512h
734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_conj_pch(__m512h __A)735 _mm512_conj_pch (__m512h __A)
736 {
737 return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
738 }
739
740 extern __inline __m512h
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_conj_pch(__m512h __W,__mmask16 __U,__m512h __A)742 _mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
743 {
744 return (__m512h)
745 __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
746 (__v16sf) __W,
747 (__mmask16) __U);
748 }
749
750 extern __inline __m512h
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_conj_pch(__mmask16 __U,__m512h __A)752 _mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
753 {
754 return (__m512h)
755 __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
756 (__v16sf) _mm512_setzero_ps (),
757 (__mmask16) __U);
758 }
759
760 /* Intrinsics of v[add,sub,mul,div]sh. */
761 extern __inline __m128h
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sh(__m128h __A,__m128h __B)763 _mm_add_sh (__m128h __A, __m128h __B)
764 {
765 __A[0] += __B[0];
766 return __A;
767 }
768
769 extern __inline __m128h
770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)771 _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
772 {
773 return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
774 }
775
776 extern __inline __m128h
777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_sh(__mmask8 __A,__m128h __B,__m128h __C)778 _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
779 {
780 return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
781 __A);
782 }
783
784 extern __inline __m128h
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sh(__m128h __A,__m128h __B)786 _mm_sub_sh (__m128h __A, __m128h __B)
787 {
788 __A[0] -= __B[0];
789 return __A;
790 }
791
792 extern __inline __m128h
793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)794 _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
795 {
796 return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
797 }
798
799 extern __inline __m128h
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_sh(__mmask8 __A,__m128h __B,__m128h __C)801 _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
802 {
803 return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
804 __A);
805 }
806
807 extern __inline __m128h
808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sh(__m128h __A,__m128h __B)809 _mm_mul_sh (__m128h __A, __m128h __B)
810 {
811 __A[0] *= __B[0];
812 return __A;
813 }
814
815 extern __inline __m128h
816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)817 _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
818 {
819 return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
820 }
821
822 extern __inline __m128h
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_sh(__mmask8 __A,__m128h __B,__m128h __C)824 _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
825 {
826 return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
827 }
828
829 extern __inline __m128h
830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sh(__m128h __A,__m128h __B)831 _mm_div_sh (__m128h __A, __m128h __B)
832 {
833 __A[0] /= __B[0];
834 return __A;
835 }
836
837 extern __inline __m128h
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)839 _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
840 {
841 return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
842 }
843
844 extern __inline __m128h
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_sh(__mmask8 __A,__m128h __B,__m128h __C)846 _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
847 {
848 return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
849 __A);
850 }
851
852 #ifdef __OPTIMIZE__
853 extern __inline __m128h
854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_sh(__m128h __A,__m128h __B,const int __C)855 _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
856 {
857 return __builtin_ia32_addsh_mask_round (__A, __B,
858 _mm_setzero_ph (),
859 (__mmask8) -1, __C);
860 }
861
862 extern __inline __m128h
863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)864 _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
865 __m128h __D, const int __E)
866 {
867 return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
868 }
869
870 extern __inline __m128h
871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)872 _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
873 const int __D)
874 {
875 return __builtin_ia32_addsh_mask_round (__B, __C,
876 _mm_setzero_ph (),
877 __A, __D);
878 }
879
880 extern __inline __m128h
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_sh(__m128h __A,__m128h __B,const int __C)882 _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
883 {
884 return __builtin_ia32_subsh_mask_round (__A, __B,
885 _mm_setzero_ph (),
886 (__mmask8) -1, __C);
887 }
888
889 extern __inline __m128h
890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)891 _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
892 __m128h __D, const int __E)
893 {
894 return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
895 }
896
897 extern __inline __m128h
898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)899 _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
900 const int __D)
901 {
902 return __builtin_ia32_subsh_mask_round (__B, __C,
903 _mm_setzero_ph (),
904 __A, __D);
905 }
906
907 extern __inline __m128h
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_sh(__m128h __A,__m128h __B,const int __C)909 _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
910 {
911 return __builtin_ia32_mulsh_mask_round (__A, __B,
912 _mm_setzero_ph (),
913 (__mmask8) -1, __C);
914 }
915
916 extern __inline __m128h
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)918 _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
919 __m128h __D, const int __E)
920 {
921 return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
922 }
923
924 extern __inline __m128h
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)926 _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
927 const int __D)
928 {
929 return __builtin_ia32_mulsh_mask_round (__B, __C,
930 _mm_setzero_ph (),
931 __A, __D);
932 }
933
934 extern __inline __m128h
935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_sh(__m128h __A,__m128h __B,const int __C)936 _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
937 {
938 return __builtin_ia32_divsh_mask_round (__A, __B,
939 _mm_setzero_ph (),
940 (__mmask8) -1, __C);
941 }
942
943 extern __inline __m128h
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)945 _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
946 __m128h __D, const int __E)
947 {
948 return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
949 }
950
951 extern __inline __m128h
952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)953 _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
954 const int __D)
955 {
956 return __builtin_ia32_divsh_mask_round (__B, __C,
957 _mm_setzero_ph (),
958 __A, __D);
959 }
960 #else
961 #define _mm_add_round_sh(A, B, C) \
962 ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B), \
963 _mm_setzero_ph (), \
964 (__mmask8)-1, (C)))
965
966 #define _mm_mask_add_round_sh(A, B, C, D, E) \
967 ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
968
969 #define _mm_maskz_add_round_sh(A, B, C, D) \
970 ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C), \
971 _mm_setzero_ph (), \
972 (A), (D)))
973
974 #define _mm_sub_round_sh(A, B, C) \
975 ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B), \
976 _mm_setzero_ph (), \
977 (__mmask8)-1, (C)))
978
979 #define _mm_mask_sub_round_sh(A, B, C, D, E) \
980 ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
981
982 #define _mm_maskz_sub_round_sh(A, B, C, D) \
983 ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C), \
984 _mm_setzero_ph (), \
985 (A), (D)))
986
987 #define _mm_mul_round_sh(A, B, C) \
988 ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B), \
989 _mm_setzero_ph (), \
990 (__mmask8)-1, (C)))
991
992 #define _mm_mask_mul_round_sh(A, B, C, D, E) \
993 ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
994
995 #define _mm_maskz_mul_round_sh(A, B, C, D) \
996 ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C), \
997 _mm_setzero_ph (), \
998 (A), (D)))
999
1000 #define _mm_div_round_sh(A, B, C) \
1001 ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B), \
1002 _mm_setzero_ph (), \
1003 (__mmask8)-1, (C)))
1004
1005 #define _mm_mask_div_round_sh(A, B, C, D, E) \
1006 ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
1007
1008 #define _mm_maskz_div_round_sh(A, B, C, D) \
1009 ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C), \
1010 _mm_setzero_ph (), \
1011 (A), (D)))
1012 #endif /* __OPTIMIZE__ */
1013
1014 /* Intrinsic vmaxph vminph. */
1015 extern __inline __m512h
1016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_ph(__m512h __A,__m512h __B)1017 _mm512_max_ph (__m512h __A, __m512h __B)
1018 {
1019 return __builtin_ia32_maxph512_mask (__A, __B,
1020 _mm512_setzero_ph (),
1021 (__mmask32) -1);
1022 }
1023
1024 extern __inline __m512h
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1026 _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1027 {
1028 return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
1029 }
1030
1031 extern __inline __m512h
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_ph(__mmask32 __A,__m512h __B,__m512h __C)1033 _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
1034 {
1035 return __builtin_ia32_maxph512_mask (__B, __C,
1036 _mm512_setzero_ph (), __A);
1037 }
1038
1039 extern __inline __m512h
1040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_ph(__m512h __A,__m512h __B)1041 _mm512_min_ph (__m512h __A, __m512h __B)
1042 {
1043 return __builtin_ia32_minph512_mask (__A, __B,
1044 _mm512_setzero_ph (),
1045 (__mmask32) -1);
1046 }
1047
1048 extern __inline __m512h
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1050 _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1051 {
1052 return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
1053 }
1054
1055 extern __inline __m512h
1056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_ph(__mmask32 __A,__m512h __B,__m512h __C)1057 _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
1058 {
1059 return __builtin_ia32_minph512_mask (__B, __C,
1060 _mm512_setzero_ph (), __A);
1061 }
1062
1063 #ifdef __OPTIMIZE__
1064 extern __inline __m512h
1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_round_ph(__m512h __A,__m512h __B,const int __C)1066 _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
1067 {
1068 return __builtin_ia32_maxph512_mask_round (__A, __B,
1069 _mm512_setzero_ph (),
1070 (__mmask32) -1, __C);
1071 }
1072
1073 extern __inline __m512h
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1075 _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1076 __m512h __D, const int __E)
1077 {
1078 return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
1079 }
1080
1081 extern __inline __m512h
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1083 _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1084 const int __D)
1085 {
1086 return __builtin_ia32_maxph512_mask_round (__B, __C,
1087 _mm512_setzero_ph (),
1088 __A, __D);
1089 }
1090
1091 extern __inline __m512h
1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_round_ph(__m512h __A,__m512h __B,const int __C)1093 _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
1094 {
1095 return __builtin_ia32_minph512_mask_round (__A, __B,
1096 _mm512_setzero_ph (),
1097 (__mmask32) -1, __C);
1098 }
1099
1100 extern __inline __m512h
1101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1102 _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1103 __m512h __D, const int __E)
1104 {
1105 return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
1106 }
1107
1108 extern __inline __m512h
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1110 _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1111 const int __D)
1112 {
1113 return __builtin_ia32_minph512_mask_round (__B, __C,
1114 _mm512_setzero_ph (),
1115 __A, __D);
1116 }
1117
1118 #else
1119 #define _mm512_max_round_ph(A, B, C) \
1120 (__builtin_ia32_maxph512_mask_round ((A), (B), \
1121 _mm512_setzero_ph (), \
1122 (__mmask32)-1, (C)))
1123
1124 #define _mm512_mask_max_round_ph(A, B, C, D, E) \
1125 (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
1126
1127 #define _mm512_maskz_max_round_ph(A, B, C, D) \
1128 (__builtin_ia32_maxph512_mask_round ((B), (C), \
1129 _mm512_setzero_ph (), \
1130 (A), (D)))
1131
1132 #define _mm512_min_round_ph(A, B, C) \
1133 (__builtin_ia32_minph512_mask_round ((A), (B), \
1134 _mm512_setzero_ph (), \
1135 (__mmask32)-1, (C)))
1136
1137 #define _mm512_mask_min_round_ph(A, B, C, D, E) \
1138 (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
1139
1140 #define _mm512_maskz_min_round_ph(A, B, C, D) \
1141 (__builtin_ia32_minph512_mask_round ((B), (C), \
1142 _mm512_setzero_ph (), \
1143 (A), (D)))
1144 #endif /* __OPTIMIZE__ */
1145
1146 /* Intrinsic vmaxsh vminsh. */
1147 extern __inline __m128h
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sh(__m128h __A,__m128h __B)1149 _mm_max_sh (__m128h __A, __m128h __B)
1150 {
1151 __A[0] = __A[0] > __B[0] ? __A[0] : __B[0];
1152 return __A;
1153 }
1154
1155 extern __inline __m128h
1156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1157 _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1158 {
1159 return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
1160 }
1161
1162 extern __inline __m128h
1163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_sh(__mmask8 __A,__m128h __B,__m128h __C)1164 _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
1165 {
1166 return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
1167 __A);
1168 }
1169
1170 extern __inline __m128h
1171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sh(__m128h __A,__m128h __B)1172 _mm_min_sh (__m128h __A, __m128h __B)
1173 {
1174 __A[0] = __A[0] < __B[0] ? __A[0] : __B[0];
1175 return __A;
1176 }
1177
1178 extern __inline __m128h
1179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1180 _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1181 {
1182 return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
1183 }
1184
1185 extern __inline __m128h
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_sh(__mmask8 __A,__m128h __B,__m128h __C)1187 _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
1188 {
1189 return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
1190 __A);
1191 }
1192
1193 #ifdef __OPTIMIZE__
1194 extern __inline __m128h
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_round_sh(__m128h __A,__m128h __B,const int __C)1196 _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
1197 {
1198 return __builtin_ia32_maxsh_mask_round (__A, __B,
1199 _mm_setzero_ph (),
1200 (__mmask8) -1, __C);
1201 }
1202
1203 extern __inline __m128h
1204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_max_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1205 _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1206 __m128h __D, const int __E)
1207 {
1208 return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
1209 }
1210
1211 extern __inline __m128h
1212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_max_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1213 _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1214 const int __D)
1215 {
1216 return __builtin_ia32_maxsh_mask_round (__B, __C,
1217 _mm_setzero_ph (),
1218 __A, __D);
1219 }
1220
1221 extern __inline __m128h
1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_round_sh(__m128h __A,__m128h __B,const int __C)1223 _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
1224 {
1225 return __builtin_ia32_minsh_mask_round (__A, __B,
1226 _mm_setzero_ph (),
1227 (__mmask8) -1, __C);
1228 }
1229
1230 extern __inline __m128h
1231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_min_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1232 _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1233 __m128h __D, const int __E)
1234 {
1235 return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
1236 }
1237
1238 extern __inline __m128h
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_min_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1240 _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1241 const int __D)
1242 {
1243 return __builtin_ia32_minsh_mask_round (__B, __C,
1244 _mm_setzero_ph (),
1245 __A, __D);
1246 }
1247
1248 #else
1249 #define _mm_max_round_sh(A, B, C) \
1250 (__builtin_ia32_maxsh_mask_round ((A), (B), \
1251 _mm_setzero_ph (), \
1252 (__mmask8)-1, (C)))
1253
1254 #define _mm_mask_max_round_sh(A, B, C, D, E) \
1255 (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
1256
1257 #define _mm_maskz_max_round_sh(A, B, C, D) \
1258 (__builtin_ia32_maxsh_mask_round ((B), (C), \
1259 _mm_setzero_ph (), \
1260 (A), (D)))
1261
1262 #define _mm_min_round_sh(A, B, C) \
1263 (__builtin_ia32_minsh_mask_round ((A), (B), \
1264 _mm_setzero_ph (), \
1265 (__mmask8)-1, (C)))
1266
1267 #define _mm_mask_min_round_sh(A, B, C, D, E) \
1268 (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
1269
1270 #define _mm_maskz_min_round_sh(A, B, C, D) \
1271 (__builtin_ia32_minsh_mask_round ((B), (C), \
1272 _mm_setzero_ph (), \
1273 (A), (D)))
1274
1275 #endif /* __OPTIMIZE__ */
1276
1277 /* vcmpph */
1278 #ifdef __OPTIMIZE
1279 extern __inline __mmask32
1280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_ph_mask(__m512h __A,__m512h __B,const int __C)1281 _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
1282 {
1283 return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
1284 (__mmask32) -1);
1285 }
1286
1287 extern __inline __mmask32
1288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_ph_mask(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1289 _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1290 const int __D)
1291 {
1292 return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
1293 __A);
1294 }
1295
1296 extern __inline __mmask32
1297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_round_ph_mask(__m512h __A,__m512h __B,const int __C,const int __D)1298 _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
1299 const int __D)
1300 {
1301 return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
1302 __C, (__mmask32) -1,
1303 __D);
1304 }
1305
1306 extern __inline __mmask32
1307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_round_ph_mask(__mmask32 __A,__m512h __B,__m512h __C,const int __D,const int __E)1308 _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
1309 const int __D, const int __E)
1310 {
1311 return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
1312 __D, __A,
1313 __E);
1314 }
1315
1316 #else
1317 #define _mm512_cmp_ph_mask(A, B, C) \
1318 (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
1319
1320 #define _mm512_mask_cmp_ph_mask(A, B, C, D) \
1321 (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
1322
1323 #define _mm512_cmp_round_ph_mask(A, B, C, D) \
1324 (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
1325
1326 #define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
1327 (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
1328
1329 #endif /* __OPTIMIZE__ */
1330
1331 /* Intrinsics vcmpsh. */
1332 #ifdef __OPTIMIZE__
1333 extern __inline __mmask8
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sh_mask(__m128h __A,__m128h __B,const int __C)1335 _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
1336 {
1337 return (__mmask8)
1338 __builtin_ia32_cmpsh_mask_round (__A, __B,
1339 __C, (__mmask8) -1,
1340 _MM_FROUND_CUR_DIRECTION);
1341 }
1342
1343 extern __inline __mmask8
1344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_sh_mask(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1345 _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1346 const int __D)
1347 {
1348 return (__mmask8)
1349 __builtin_ia32_cmpsh_mask_round (__B, __C,
1350 __D, __A,
1351 _MM_FROUND_CUR_DIRECTION);
1352 }
1353
1354 extern __inline __mmask8
1355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_round_sh_mask(__m128h __A,__m128h __B,const int __C,const int __D)1356 _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
1357 const int __D)
1358 {
1359 return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
1360 __C, (__mmask8) -1,
1361 __D);
1362 }
1363
1364 extern __inline __mmask8
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_round_sh_mask(__mmask8 __A,__m128h __B,__m128h __C,const int __D,const int __E)1366 _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
1367 const int __D, const int __E)
1368 {
1369 return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
1370 __D, __A,
1371 __E);
1372 }
1373
1374 #else
1375 #define _mm_cmp_sh_mask(A, B, C) \
1376 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), \
1377 (_MM_FROUND_CUR_DIRECTION)))
1378
1379 #define _mm_mask_cmp_sh_mask(A, B, C, D) \
1380 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), \
1381 (_MM_FROUND_CUR_DIRECTION)))
1382
1383 #define _mm_cmp_round_sh_mask(A, B, C, D) \
1384 (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
1385
1386 #define _mm_mask_cmp_round_sh_mask(A, B, C, D, E) \
1387 (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
1388
1389 #endif /* __OPTIMIZE__ */
1390
1391 /* Intrinsics vcomish. */
1392 extern __inline int
1393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sh(__m128h __A,__m128h __B)1394 _mm_comieq_sh (__m128h __A, __m128h __B)
1395 {
1396 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
1397 (__mmask8) -1,
1398 _MM_FROUND_CUR_DIRECTION);
1399 }
1400
1401 extern __inline int
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sh(__m128h __A,__m128h __B)1403 _mm_comilt_sh (__m128h __A, __m128h __B)
1404 {
1405 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
1406 (__mmask8) -1,
1407 _MM_FROUND_CUR_DIRECTION);
1408 }
1409
1410 extern __inline int
1411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sh(__m128h __A,__m128h __B)1412 _mm_comile_sh (__m128h __A, __m128h __B)
1413 {
1414 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
1415 (__mmask8) -1,
1416 _MM_FROUND_CUR_DIRECTION);
1417 }
1418
1419 extern __inline int
1420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sh(__m128h __A,__m128h __B)1421 _mm_comigt_sh (__m128h __A, __m128h __B)
1422 {
1423 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
1424 (__mmask8) -1,
1425 _MM_FROUND_CUR_DIRECTION);
1426 }
1427
1428 extern __inline int
1429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sh(__m128h __A,__m128h __B)1430 _mm_comige_sh (__m128h __A, __m128h __B)
1431 {
1432 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
1433 (__mmask8) -1,
1434 _MM_FROUND_CUR_DIRECTION);
1435 }
1436
1437 extern __inline int
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sh(__m128h __A,__m128h __B)1439 _mm_comineq_sh (__m128h __A, __m128h __B)
1440 {
1441 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
1442 (__mmask8) -1,
1443 _MM_FROUND_CUR_DIRECTION);
1444 }
1445
1446 extern __inline int
1447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sh(__m128h __A,__m128h __B)1448 _mm_ucomieq_sh (__m128h __A, __m128h __B)
1449 {
1450 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
1451 (__mmask8) -1,
1452 _MM_FROUND_CUR_DIRECTION);
1453 }
1454
1455 extern __inline int
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sh(__m128h __A,__m128h __B)1457 _mm_ucomilt_sh (__m128h __A, __m128h __B)
1458 {
1459 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
1460 (__mmask8) -1,
1461 _MM_FROUND_CUR_DIRECTION);
1462 }
1463
1464 extern __inline int
1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sh(__m128h __A,__m128h __B)1466 _mm_ucomile_sh (__m128h __A, __m128h __B)
1467 {
1468 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
1469 (__mmask8) -1,
1470 _MM_FROUND_CUR_DIRECTION);
1471 }
1472
1473 extern __inline int
1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sh(__m128h __A,__m128h __B)1475 _mm_ucomigt_sh (__m128h __A, __m128h __B)
1476 {
1477 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
1478 (__mmask8) -1,
1479 _MM_FROUND_CUR_DIRECTION);
1480 }
1481
1482 extern __inline int
1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sh(__m128h __A,__m128h __B)1484 _mm_ucomige_sh (__m128h __A, __m128h __B)
1485 {
1486 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
1487 (__mmask8) -1,
1488 _MM_FROUND_CUR_DIRECTION);
1489 }
1490
1491 extern __inline int
1492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sh(__m128h __A,__m128h __B)1493 _mm_ucomineq_sh (__m128h __A, __m128h __B)
1494 {
1495 return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
1496 (__mmask8) -1,
1497 _MM_FROUND_CUR_DIRECTION);
1498 }
1499
1500 #ifdef __OPTIMIZE__
1501 extern __inline int
1502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comi_sh(__m128h __A,__m128h __B,const int __P)1503 _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
1504 {
1505 return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1506 (__mmask8) -1,
1507 _MM_FROUND_CUR_DIRECTION);
1508 }
1509
1510 extern __inline int
1511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_comi_round_sh(__m128h __A,__m128h __B,const int __P,const int __R)1512 _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
1513 {
1514 return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
1515 (__mmask8) -1,__R);
1516 }
1517
1518 #else
1519 #define _mm_comi_round_sh(A, B, P, R) \
1520 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
1521 #define _mm_comi_sh(A, B, P) \
1522 (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), \
1523 _MM_FROUND_CUR_DIRECTION))
1524
1525 #endif /* __OPTIMIZE__ */
1526
1527 /* Intrinsics vsqrtph. */
1528 extern __inline __m512h
1529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_ph(__m512h __A)1530 _mm512_sqrt_ph (__m512h __A)
1531 {
1532 return __builtin_ia32_sqrtph512_mask_round (__A,
1533 _mm512_setzero_ph(),
1534 (__mmask32) -1,
1535 _MM_FROUND_CUR_DIRECTION);
1536 }
1537
1538 extern __inline __m512h
1539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_ph(__m512h __A,__mmask32 __B,__m512h __C)1540 _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1541 {
1542 return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
1543 _MM_FROUND_CUR_DIRECTION);
1544 }
1545
1546 extern __inline __m512h
1547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_ph(__mmask32 __A,__m512h __B)1548 _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
1549 {
1550 return __builtin_ia32_sqrtph512_mask_round (__B,
1551 _mm512_setzero_ph (),
1552 __A,
1553 _MM_FROUND_CUR_DIRECTION);
1554 }
1555
1556 #ifdef __OPTIMIZE__
1557 extern __inline __m512h
1558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_ph(__m512h __A,const int __B)1559 _mm512_sqrt_round_ph (__m512h __A, const int __B)
1560 {
1561 return __builtin_ia32_sqrtph512_mask_round (__A,
1562 _mm512_setzero_ph(),
1563 (__mmask32) -1, __B);
1564 }
1565
1566 extern __inline __m512h
1567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_ph(__m512h __A,__mmask32 __B,__m512h __C,const int __D)1568 _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1569 const int __D)
1570 {
1571 return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
1572 }
1573
1574 extern __inline __m512h
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_ph(__mmask32 __A,__m512h __B,const int __C)1576 _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
1577 {
1578 return __builtin_ia32_sqrtph512_mask_round (__B,
1579 _mm512_setzero_ph (),
1580 __A, __C);
1581 }
1582
1583 #else
1584 #define _mm512_sqrt_round_ph(A, B) \
1585 (__builtin_ia32_sqrtph512_mask_round ((A), \
1586 _mm512_setzero_ph (), \
1587 (__mmask32)-1, (B)))
1588
1589 #define _mm512_mask_sqrt_round_ph(A, B, C, D) \
1590 (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
1591
1592 #define _mm512_maskz_sqrt_round_ph(A, B, C) \
1593 (__builtin_ia32_sqrtph512_mask_round ((B), \
1594 _mm512_setzero_ph (), \
1595 (A), (C)))
1596
1597 #endif /* __OPTIMIZE__ */
1598
1599 /* Intrinsics vrsqrtph. */
1600 extern __inline __m512h
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt_ph(__m512h __A)1602 _mm512_rsqrt_ph (__m512h __A)
1603 {
1604 return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
1605 (__mmask32) -1);
1606 }
1607
1608 extern __inline __m512h
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt_ph(__m512h __A,__mmask32 __B,__m512h __C)1610 _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
1611 {
1612 return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
1613 }
1614
1615 extern __inline __m512h
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt_ph(__mmask32 __A,__m512h __B)1617 _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
1618 {
1619 return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
1620 __A);
1621 }
1622
1623 /* Intrinsics vrsqrtsh. */
1624 extern __inline __m128h
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt_sh(__m128h __A,__m128h __B)1626 _mm_rsqrt_sh (__m128h __A, __m128h __B)
1627 {
1628 return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
1629 (__mmask8) -1);
1630 }
1631
1632 extern __inline __m128h
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1634 _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1635 {
1636 return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
1637 }
1638
1639 extern __inline __m128h
1640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt_sh(__mmask8 __A,__m128h __B,__m128h __C)1641 _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1642 {
1643 return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
1644 __A);
1645 }
1646
1647 /* Intrinsics vsqrtsh. */
1648 extern __inline __m128h
1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sh(__m128h __A,__m128h __B)1650 _mm_sqrt_sh (__m128h __A, __m128h __B)
1651 {
1652 return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1653 _mm_setzero_ph (),
1654 (__mmask8) -1,
1655 _MM_FROUND_CUR_DIRECTION);
1656 }
1657
1658 extern __inline __m128h
1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1660 _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1661 {
1662 return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1663 _MM_FROUND_CUR_DIRECTION);
1664 }
1665
1666 extern __inline __m128h
1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_sh(__mmask8 __A,__m128h __B,__m128h __C)1668 _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
1669 {
1670 return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1671 _mm_setzero_ph (),
1672 __A, _MM_FROUND_CUR_DIRECTION);
1673 }
1674
1675 #ifdef __OPTIMIZE__
1676 extern __inline __m128h
1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_sh(__m128h __A,__m128h __B,const int __C)1678 _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
1679 {
1680 return __builtin_ia32_sqrtsh_mask_round (__B, __A,
1681 _mm_setzero_ph (),
1682 (__mmask8) -1, __C);
1683 }
1684
1685 extern __inline __m128h
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1687 _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1688 __m128h __D, const int __E)
1689 {
1690 return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
1691 __E);
1692 }
1693
1694 extern __inline __m128h
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1696 _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1697 const int __D)
1698 {
1699 return __builtin_ia32_sqrtsh_mask_round (__C, __B,
1700 _mm_setzero_ph (),
1701 __A, __D);
1702 }
1703
1704 #else
1705 #define _mm_sqrt_round_sh(A, B, C) \
1706 (__builtin_ia32_sqrtsh_mask_round ((B), (A), \
1707 _mm_setzero_ph (), \
1708 (__mmask8)-1, (C)))
1709
1710 #define _mm_mask_sqrt_round_sh(A, B, C, D, E) \
1711 (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
1712
1713 #define _mm_maskz_sqrt_round_sh(A, B, C, D) \
1714 (__builtin_ia32_sqrtsh_mask_round ((C), (B), \
1715 _mm_setzero_ph (), \
1716 (A), (D)))
1717
1718 #endif /* __OPTIMIZE__ */
1719
1720 /* Intrinsics vrcpph. */
1721 extern __inline __m512h
1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp_ph(__m512h __A)1723 _mm512_rcp_ph (__m512h __A)
1724 {
1725 return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
1726 (__mmask32) -1);
1727 }
1728
1729 extern __inline __m512h
1730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp_ph(__m512h __A,__mmask32 __B,__m512h __C)1731 _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
1732 {
1733 return __builtin_ia32_rcpph512_mask (__C, __A, __B);
1734 }
1735
1736 extern __inline __m512h
1737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp_ph(__mmask32 __A,__m512h __B)1738 _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
1739 {
1740 return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
1741 __A);
1742 }
1743
1744 /* Intrinsics vrcpsh. */
1745 extern __inline __m128h
1746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp_sh(__m128h __A,__m128h __B)1747 _mm_rcp_sh (__m128h __A, __m128h __B)
1748 {
1749 return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
1750 (__mmask8) -1);
1751 }
1752
1753 extern __inline __m128h
1754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp_sh(__m128h __A,__mmask32 __B,__m128h __C,__m128h __D)1755 _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
1756 {
1757 return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
1758 }
1759
1760 extern __inline __m128h
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp_sh(__mmask32 __A,__m128h __B,__m128h __C)1762 _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
1763 {
1764 return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
1765 __A);
1766 }
1767
1768 /* Intrinsics vscalefph. */
1769 extern __inline __m512h
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_ph(__m512h __A,__m512h __B)1771 _mm512_scalef_ph (__m512h __A, __m512h __B)
1772 {
1773 return __builtin_ia32_scalefph512_mask_round (__A, __B,
1774 _mm512_setzero_ph (),
1775 (__mmask32) -1,
1776 _MM_FROUND_CUR_DIRECTION);
1777 }
1778
1779 extern __inline __m512h
1780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D)1781 _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
1782 {
1783 return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1784 _MM_FROUND_CUR_DIRECTION);
1785 }
1786
1787 extern __inline __m512h
1788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_ph(__mmask32 __A,__m512h __B,__m512h __C)1789 _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
1790 {
1791 return __builtin_ia32_scalefph512_mask_round (__B, __C,
1792 _mm512_setzero_ph (),
1793 __A,
1794 _MM_FROUND_CUR_DIRECTION);
1795 }
1796
1797 #ifdef __OPTIMIZE__
1798 extern __inline __m512h
1799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_round_ph(__m512h __A,__m512h __B,const int __C)1800 _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
1801 {
1802 return __builtin_ia32_scalefph512_mask_round (__A, __B,
1803 _mm512_setzero_ph (),
1804 (__mmask32) -1, __C);
1805 }
1806
1807 extern __inline __m512h
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_round_ph(__m512h __A,__mmask32 __B,__m512h __C,__m512h __D,const int __E)1809 _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1810 __m512h __D, const int __E)
1811 {
1812 return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
1813 __E);
1814 }
1815
1816 extern __inline __m512h
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_round_ph(__mmask32 __A,__m512h __B,__m512h __C,const int __D)1818 _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
1819 const int __D)
1820 {
1821 return __builtin_ia32_scalefph512_mask_round (__B, __C,
1822 _mm512_setzero_ph (),
1823 __A, __D);
1824 }
1825
1826 #else
1827 #define _mm512_scalef_round_ph(A, B, C) \
1828 (__builtin_ia32_scalefph512_mask_round ((A), (B), \
1829 _mm512_setzero_ph (), \
1830 (__mmask32)-1, (C)))
1831
1832 #define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
1833 (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
1834
1835 #define _mm512_maskz_scalef_round_ph(A, B, C, D) \
1836 (__builtin_ia32_scalefph512_mask_round ((B), (C), \
1837 _mm512_setzero_ph (), \
1838 (A), (D)))
1839
1840 #endif /* __OPTIMIZE__ */
1841
1842 /* Intrinsics vscalefsh. */
1843 extern __inline __m128h
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_sh(__m128h __A,__m128h __B)1845 _mm_scalef_sh (__m128h __A, __m128h __B)
1846 {
1847 return __builtin_ia32_scalefsh_mask_round (__A, __B,
1848 _mm_setzero_ph (),
1849 (__mmask8) -1,
1850 _MM_FROUND_CUR_DIRECTION);
1851 }
1852
1853 extern __inline __m128h
1854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)1855 _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
1856 {
1857 return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1858 _MM_FROUND_CUR_DIRECTION);
1859 }
1860
1861 extern __inline __m128h
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_sh(__mmask8 __A,__m128h __B,__m128h __C)1863 _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
1864 {
1865 return __builtin_ia32_scalefsh_mask_round (__B, __C,
1866 _mm_setzero_ph (),
1867 __A,
1868 _MM_FROUND_CUR_DIRECTION);
1869 }
1870
1871 #ifdef __OPTIMIZE__
1872 extern __inline __m128h
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_sh(__m128h __A,__m128h __B,const int __C)1874 _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
1875 {
1876 return __builtin_ia32_scalefsh_mask_round (__A, __B,
1877 _mm_setzero_ph (),
1878 (__mmask8) -1, __C);
1879 }
1880
1881 extern __inline __m128h
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_scalef_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)1883 _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
1884 __m128h __D, const int __E)
1885 {
1886 return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
1887 __E);
1888 }
1889
1890 extern __inline __m128h
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_scalef_round_sh(__mmask8 __A,__m128h __B,__m128h __C,const int __D)1892 _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
1893 const int __D)
1894 {
1895 return __builtin_ia32_scalefsh_mask_round (__B, __C,
1896 _mm_setzero_ph (),
1897 __A, __D);
1898 }
1899
1900 #else
1901 #define _mm_scalef_round_sh(A, B, C) \
1902 (__builtin_ia32_scalefsh_mask_round ((A), (B), \
1903 _mm_setzero_ph (), \
1904 (__mmask8)-1, (C)))
1905
1906 #define _mm_mask_scalef_round_sh(A, B, C, D, E) \
1907 (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
1908
1909 #define _mm_maskz_scalef_round_sh(A, B, C, D) \
1910 (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (), \
1911 (A), (D)))
1912
1913 #endif /* __OPTIMIZE__ */
1914
1915 /* Intrinsics vreduceph. */
1916 #ifdef __OPTIMIZE__
1917 extern __inline __m512h
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_ph(__m512h __A,int __B)1919 _mm512_reduce_ph (__m512h __A, int __B)
1920 {
1921 return __builtin_ia32_reduceph512_mask_round (__A, __B,
1922 _mm512_setzero_ph (),
1923 (__mmask32) -1,
1924 _MM_FROUND_CUR_DIRECTION);
1925 }
1926
1927 extern __inline __m512h
1928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D)1929 _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
1930 {
1931 return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1932 _MM_FROUND_CUR_DIRECTION);
1933 }
1934
1935 extern __inline __m512h
1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_ph(__mmask32 __A,__m512h __B,int __C)1937 _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
1938 {
1939 return __builtin_ia32_reduceph512_mask_round (__B, __C,
1940 _mm512_setzero_ph (),
1941 __A,
1942 _MM_FROUND_CUR_DIRECTION);
1943 }
1944
1945 extern __inline __m512h
1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_round_ph(__m512h __A,int __B,const int __C)1947 _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
1948 {
1949 return __builtin_ia32_reduceph512_mask_round (__A, __B,
1950 _mm512_setzero_ph (),
1951 (__mmask32) -1, __C);
1952 }
1953
1954 extern __inline __m512h
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_reduce_round_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D,const int __E)1956 _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
1957 int __D, const int __E)
1958 {
1959 return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
1960 __E);
1961 }
1962
1963 extern __inline __m512h
1964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_reduce_round_ph(__mmask32 __A,__m512h __B,int __C,const int __D)1965 _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
1966 const int __D)
1967 {
1968 return __builtin_ia32_reduceph512_mask_round (__B, __C,
1969 _mm512_setzero_ph (),
1970 __A, __D);
1971 }
1972
1973 #else
1974 #define _mm512_reduce_ph(A, B) \
1975 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
1976 _mm512_setzero_ph (), \
1977 (__mmask32)-1, \
1978 _MM_FROUND_CUR_DIRECTION))
1979
1980 #define _mm512_mask_reduce_ph(A, B, C, D) \
1981 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
1982 _MM_FROUND_CUR_DIRECTION))
1983
1984 #define _mm512_maskz_reduce_ph(A, B, C) \
1985 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
1986 _mm512_setzero_ph (), \
1987 (A), _MM_FROUND_CUR_DIRECTION))
1988
1989 #define _mm512_reduce_round_ph(A, B, C) \
1990 (__builtin_ia32_reduceph512_mask_round ((A), (B), \
1991 _mm512_setzero_ph (), \
1992 (__mmask32)-1, (C)))
1993
1994 #define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
1995 (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
1996
1997 #define _mm512_maskz_reduce_round_ph(A, B, C, D) \
1998 (__builtin_ia32_reduceph512_mask_round ((B), (C), \
1999 _mm512_setzero_ph (), \
2000 (A), (D)))
2001
2002 #endif /* __OPTIMIZE__ */
2003
2004 /* Intrinsics vreducesh. */
2005 #ifdef __OPTIMIZE__
2006 extern __inline __m128h
2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_sh(__m128h __A,__m128h __B,int __C)2008 _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
2009 {
2010 return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2011 _mm_setzero_ph (),
2012 (__mmask8) -1,
2013 _MM_FROUND_CUR_DIRECTION);
2014 }
2015
2016 extern __inline __m128h
2017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E)2018 _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
2019 __m128h __D, int __E)
2020 {
2021 return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
2022 _MM_FROUND_CUR_DIRECTION);
2023 }
2024
2025 extern __inline __m128h
2026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D)2027 _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2028 {
2029 return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2030 _mm_setzero_ph (), __A,
2031 _MM_FROUND_CUR_DIRECTION);
2032 }
2033
2034 extern __inline __m128h
2035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_reduce_round_sh(__m128h __A,__m128h __B,int __C,const int __D)2036 _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2037 {
2038 return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
2039 _mm_setzero_ph (),
2040 (__mmask8) -1, __D);
2041 }
2042
2043 extern __inline __m128h
2044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_reduce_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E,const int __F)2045 _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2046 __m128h __D, int __E, const int __F)
2047 {
2048 return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
2049 __B, __F);
2050 }
2051
2052 extern __inline __m128h
2053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_reduce_round_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D,const int __E)2054 _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2055 int __D, const int __E)
2056 {
2057 return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
2058 _mm_setzero_ph (),
2059 __A, __E);
2060 }
2061
2062 #else
2063 #define _mm_reduce_sh(A, B, C) \
2064 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
2065 _mm_setzero_ph (), \
2066 (__mmask8)-1, \
2067 _MM_FROUND_CUR_DIRECTION))
2068
2069 #define _mm_mask_reduce_sh(A, B, C, D, E) \
2070 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), \
2071 _MM_FROUND_CUR_DIRECTION))
2072
2073 #define _mm_maskz_reduce_sh(A, B, C, D) \
2074 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
2075 _mm_setzero_ph (), \
2076 (A), _MM_FROUND_CUR_DIRECTION))
2077
2078 #define _mm_reduce_round_sh(A, B, C, D) \
2079 (__builtin_ia32_reducesh_mask_round ((A), (B), (C), \
2080 _mm_setzero_ph (), \
2081 (__mmask8)-1, (D)))
2082
2083 #define _mm_mask_reduce_round_sh(A, B, C, D, E, F) \
2084 (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
2085
2086 #define _mm_maskz_reduce_round_sh(A, B, C, D, E) \
2087 (__builtin_ia32_reducesh_mask_round ((B), (C), (D), \
2088 _mm_setzero_ph (), \
2089 (A), (E)))
2090
2091 #endif /* __OPTIMIZE__ */
2092
2093 /* Intrinsics vrndscaleph. */
2094 #ifdef __OPTIMIZE__
2095 extern __inline __m512h
2096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_ph(__m512h __A,int __B)2097 _mm512_roundscale_ph (__m512h __A, int __B)
2098 {
2099 return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2100 _mm512_setzero_ph (),
2101 (__mmask32) -1,
2102 _MM_FROUND_CUR_DIRECTION);
2103 }
2104
2105 extern __inline __m512h
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D)2107 _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
2108 __m512h __C, int __D)
2109 {
2110 return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
2111 _MM_FROUND_CUR_DIRECTION);
2112 }
2113
2114 extern __inline __m512h
2115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_ph(__mmask32 __A,__m512h __B,int __C)2116 _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
2117 {
2118 return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2119 _mm512_setzero_ph (),
2120 __A,
2121 _MM_FROUND_CUR_DIRECTION);
2122 }
2123
2124 extern __inline __m512h
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_round_ph(__m512h __A,int __B,const int __C)2126 _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
2127 {
2128 return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
2129 _mm512_setzero_ph (),
2130 (__mmask32) -1,
2131 __C);
2132 }
2133
2134 extern __inline __m512h
2135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_roundscale_round_ph(__m512h __A,__mmask32 __B,__m512h __C,int __D,const int __E)2136 _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
2137 __m512h __C, int __D, const int __E)
2138 {
2139 return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
2140 __B, __E);
2141 }
2142
2143 extern __inline __m512h
2144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_roundscale_round_ph(__mmask32 __A,__m512h __B,int __C,const int __D)2145 _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
2146 const int __D)
2147 {
2148 return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
2149 _mm512_setzero_ph (),
2150 __A, __D);
2151 }
2152
2153 #else
2154 #define _mm512_roundscale_ph(A, B) \
2155 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
2156 _mm512_setzero_ph (), \
2157 (__mmask32)-1, \
2158 _MM_FROUND_CUR_DIRECTION))
2159
2160 #define _mm512_mask_roundscale_ph(A, B, C, D) \
2161 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
2162 _MM_FROUND_CUR_DIRECTION))
2163
2164 #define _mm512_maskz_roundscale_ph(A, B, C) \
2165 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
2166 _mm512_setzero_ph (), \
2167 (A), \
2168 _MM_FROUND_CUR_DIRECTION))
2169 #define _mm512_roundscale_round_ph(A, B, C) \
2170 (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
2171 _mm512_setzero_ph (), \
2172 (__mmask32)-1, (C)))
2173
2174 #define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
2175 (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
2176
2177 #define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
2178 (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
2179 _mm512_setzero_ph (), \
2180 (A), (D)))
2181
2182 #endif /* __OPTIMIZE__ */
2183
2184 /* Intrinsics vrndscalesh. */
2185 #ifdef __OPTIMIZE__
2186 extern __inline __m128h
2187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_sh(__m128h __A,__m128h __B,int __C)2188 _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
2189 {
2190 return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2191 _mm_setzero_ph (),
2192 (__mmask8) -1,
2193 _MM_FROUND_CUR_DIRECTION);
2194 }
2195
2196 extern __inline __m128h
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E)2198 _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
2199 __m128h __D, int __E)
2200 {
2201 return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
2202 _MM_FROUND_CUR_DIRECTION);
2203 }
2204
2205 extern __inline __m128h
2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D)2207 _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
2208 {
2209 return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2210 _mm_setzero_ph (), __A,
2211 _MM_FROUND_CUR_DIRECTION);
2212 }
2213
2214 extern __inline __m128h
2215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_roundscale_round_sh(__m128h __A,__m128h __B,int __C,const int __D)2216 _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
2217 {
2218 return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
2219 _mm_setzero_ph (),
2220 (__mmask8) -1,
2221 __D);
2222 }
2223
2224 extern __inline __m128h
2225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_roundscale_round_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,int __E,const int __F)2226 _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
2227 __m128h __D, int __E, const int __F)
2228 {
2229 return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
2230 __A, __B, __F);
2231 }
2232
2233 extern __inline __m128h
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_roundscale_round_sh(__mmask8 __A,__m128h __B,__m128h __C,int __D,const int __E)2235 _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
2236 int __D, const int __E)
2237 {
2238 return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
2239 _mm_setzero_ph (),
2240 __A, __E);
2241 }
2242
2243 #else
2244 #define _mm_roundscale_sh(A, B, C) \
2245 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
2246 _mm_setzero_ph (), \
2247 (__mmask8)-1, \
2248 _MM_FROUND_CUR_DIRECTION))
2249
2250 #define _mm_mask_roundscale_sh(A, B, C, D, E) \
2251 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), \
2252 _MM_FROUND_CUR_DIRECTION))
2253
2254 #define _mm_maskz_roundscale_sh(A, B, C, D) \
2255 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
2256 _mm_setzero_ph (), \
2257 (A), _MM_FROUND_CUR_DIRECTION))
2258
2259 #define _mm_roundscale_round_sh(A, B, C, D) \
2260 (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C), \
2261 _mm_setzero_ph (), \
2262 (__mmask8)-1, (D)))
2263
2264 #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F) \
2265 (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
2266
2267 #define _mm_maskz_roundscale_round_sh(A, B, C, D, E) \
2268 (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D), \
2269 _mm_setzero_ph (), \
2270 (A), (E)))
2271
2272 #endif /* __OPTIMIZE__ */
2273
2274 /* Intrinsics vfpclasssh. */
2275 #ifdef __OPTIMIZE__
2276 extern __inline __mmask8
2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fpclass_sh_mask(__m128h __A,const int __imm)2278 _mm_fpclass_sh_mask (__m128h __A, const int __imm)
2279 {
2280 return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
2281 (__mmask8) -1);
2282 }
2283
2284 extern __inline __mmask8
2285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fpclass_sh_mask(__mmask8 __U,__m128h __A,const int __imm)2286 _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
2287 {
2288 return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm, __U);
2289 }
2290
2291 #else
2292 #define _mm_fpclass_sh_mask(X, C) \
2293 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
2294 (int) (C), (__mmask8) (-1))) \
2295
2296 #define _mm_mask_fpclass_sh_mask(U, X, C) \
2297 ((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
2298 (int) (C), (__mmask8) (U)))
2299 #endif /* __OPTIMIZE__ */
2300
2301 /* Intrinsics vfpclassph. */
2302 #ifdef __OPTIMIZE__
2303 extern __inline __mmask32
2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fpclass_ph_mask(__mmask32 __U,__m512h __A,const int __imm)2305 _mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
2306 const int __imm)
2307 {
2308 return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2309 __imm, __U);
2310 }
2311
2312 extern __inline __mmask32
2313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fpclass_ph_mask(__m512h __A,const int __imm)2314 _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
2315 {
2316 return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
2317 __imm,
2318 (__mmask32) -1);
2319 }
2320
2321 #else
2322 #define _mm512_mask_fpclass_ph_mask(u, x, c) \
2323 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2324 (int) (c),(__mmask8)(u)))
2325
2326 #define _mm512_fpclass_ph_mask(x, c) \
2327 ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
2328 (int) (c),(__mmask8)-1))
2329 #endif /* __OPIMTIZE__ */
2330
2331 /* Intrinsics vgetexpph, vgetexpsh. */
2332 extern __inline __m128h
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_sh(__m128h __A,__m128h __B)2334 _mm_getexp_sh (__m128h __A, __m128h __B)
2335 {
2336 return (__m128h)
2337 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2338 (__v8hf) _mm_setzero_ph (),
2339 (__mmask8) -1,
2340 _MM_FROUND_CUR_DIRECTION);
2341 }
2342
2343 extern __inline __m128h
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)2345 _mm_mask_getexp_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
2346 {
2347 return (__m128h)
2348 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2349 (__v8hf) __W, (__mmask8) __U,
2350 _MM_FROUND_CUR_DIRECTION);
2351 }
2352
2353 extern __inline __m128h
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_sh(__mmask8 __U,__m128h __A,__m128h __B)2355 _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
2356 {
2357 return (__m128h)
2358 __builtin_ia32_getexpsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2359 (__v8hf) _mm_setzero_ph (),
2360 (__mmask8) __U,
2361 _MM_FROUND_CUR_DIRECTION);
2362 }
2363
2364 extern __inline __m512h
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_ph(__m512h __A)2366 _mm512_getexp_ph (__m512h __A)
2367 {
2368 return (__m512h)
2369 __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2370 (__v32hf) _mm512_setzero_ph (),
2371 (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
2372 }
2373
2374 extern __inline __m512h
2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getexp_ph(__m512h __W,__mmask32 __U,__m512h __A)2376 _mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
2377 {
2378 return (__m512h)
2379 __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
2380 (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2381 }
2382
2383 extern __inline __m512h
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getexp_ph(__mmask32 __U,__m512h __A)2385 _mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
2386 {
2387 return (__m512h)
2388 __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2389 (__v32hf) _mm512_setzero_ph (),
2390 (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
2391 }
2392
2393 #ifdef __OPTIMIZE__
2394 extern __inline __m128h
2395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_round_sh(__m128h __A,__m128h __B,const int __R)2396 _mm_getexp_round_sh (__m128h __A, __m128h __B, const int __R)
2397 {
2398 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2399 (__v8hf) __B,
2400 _mm_setzero_ph (),
2401 (__mmask8) -1,
2402 __R);
2403 }
2404
2405 extern __inline __m128h
2406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getexp_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)2407 _mm_mask_getexp_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2408 __m128h __B, const int __R)
2409 {
2410 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2411 (__v8hf) __B,
2412 (__v8hf) __W,
2413 (__mmask8) __U, __R);
2414 }
2415
2416 extern __inline __m128h
2417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getexp_round_sh(__mmask8 __U,__m128h __A,__m128h __B,const int __R)2418 _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2419 const int __R)
2420 {
2421 return (__m128h) __builtin_ia32_getexpsh_mask_round ((__v8hf) __A,
2422 (__v8hf) __B,
2423 (__v8hf)
2424 _mm_setzero_ph (),
2425 (__mmask8) __U, __R);
2426 }
2427
2428 extern __inline __m512h
2429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_round_ph(__m512h __A,const int __R)2430 _mm512_getexp_round_ph (__m512h __A, const int __R)
2431 {
2432 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2433 (__v32hf)
2434 _mm512_setzero_ph (),
2435 (__mmask32) -1, __R);
2436 }
2437
2438 extern __inline __m512h
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getexp_round_ph(__m512h __W,__mmask32 __U,__m512h __A,const int __R)2440 _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2441 const int __R)
2442 {
2443 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2444 (__v32hf) __W,
2445 (__mmask32) __U, __R);
2446 }
2447
2448 extern __inline __m512h
2449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getexp_round_ph(__mmask32 __U,__m512h __A,const int __R)2450 _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
2451 {
2452 return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
2453 (__v32hf)
2454 _mm512_setzero_ph (),
2455 (__mmask32) __U, __R);
2456 }
2457
2458 #else
2459 #define _mm_getexp_round_sh(A, B, R) \
2460 ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
2461 (__v8hf)(__m128h)(B), \
2462 (__v8hf)_mm_setzero_ph(), \
2463 (__mmask8)-1, R))
2464
2465 #define _mm_mask_getexp_round_sh(W, U, A, B, C) \
2466 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
2467
2468 #define _mm_maskz_getexp_round_sh(U, A, B, C) \
2469 (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, \
2470 (__v8hf)_mm_setzero_ph(), \
2471 U, C)
2472
2473 #define _mm512_getexp_round_ph(A, R) \
2474 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2475 (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
2476
2477 #define _mm512_mask_getexp_round_ph(W, U, A, R) \
2478 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2479 (__v32hf)(__m512h)(W), (__mmask32)(U), R))
2480
2481 #define _mm512_maskz_getexp_round_ph(U, A, R) \
2482 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
2483 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
2484
2485 #endif /* __OPTIMIZE__ */
2486
2487 /* Intrinsics vgetmantph, vgetmantsh. */
2488 #ifdef __OPTIMIZE__
2489 extern __inline __m128h
2490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_sh(__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2491 _mm_getmant_sh (__m128h __A, __m128h __B,
2492 _MM_MANTISSA_NORM_ENUM __C,
2493 _MM_MANTISSA_SIGN_ENUM __D)
2494 {
2495 return (__m128h)
2496 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2497 (__D << 2) | __C, _mm_setzero_ph (),
2498 (__mmask8) -1,
2499 _MM_FROUND_CUR_DIRECTION);
2500 }
2501
2502 extern __inline __m128h
2503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2504 _mm_mask_getmant_sh (__m128h __W, __mmask8 __U, __m128h __A,
2505 __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2506 _MM_MANTISSA_SIGN_ENUM __D)
2507 {
2508 return (__m128h)
2509 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2510 (__D << 2) | __C, (__v8hf) __W,
2511 __U, _MM_FROUND_CUR_DIRECTION);
2512 }
2513
2514 extern __inline __m128h
2515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_sh(__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D)2516 _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
2517 _MM_MANTISSA_NORM_ENUM __C,
2518 _MM_MANTISSA_SIGN_ENUM __D)
2519 {
2520 return (__m128h)
2521 __builtin_ia32_getmantsh_mask_round ((__v8hf) __A, (__v8hf) __B,
2522 (__D << 2) | __C,
2523 (__v8hf) _mm_setzero_ph(),
2524 __U, _MM_FROUND_CUR_DIRECTION);
2525 }
2526
2527 extern __inline __m512h
2528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_ph(__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2529 _mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2530 _MM_MANTISSA_SIGN_ENUM __C)
2531 {
2532 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2533 (__C << 2) | __B,
2534 _mm512_setzero_ph (),
2535 (__mmask32) -1,
2536 _MM_FROUND_CUR_DIRECTION);
2537 }
2538
2539 extern __inline __m512h
2540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getmant_ph(__m512h __W,__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2541 _mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
2542 _MM_MANTISSA_NORM_ENUM __B,
2543 _MM_MANTISSA_SIGN_ENUM __C)
2544 {
2545 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2546 (__C << 2) | __B,
2547 (__v32hf) __W, __U,
2548 _MM_FROUND_CUR_DIRECTION);
2549 }
2550
2551 extern __inline __m512h
2552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getmant_ph(__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C)2553 _mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
2554 _MM_MANTISSA_NORM_ENUM __B,
2555 _MM_MANTISSA_SIGN_ENUM __C)
2556 {
2557 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2558 (__C << 2) | __B,
2559 (__v32hf)
2560 _mm512_setzero_ph (),
2561 __U,
2562 _MM_FROUND_CUR_DIRECTION);
2563 }
2564
2565 extern __inline __m128h
2566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_round_sh(__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2567 _mm_getmant_round_sh (__m128h __A, __m128h __B,
2568 _MM_MANTISSA_NORM_ENUM __C,
2569 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2570 {
2571 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2572 (__v8hf) __B,
2573 (__D << 2) | __C,
2574 _mm_setzero_ph (),
2575 (__mmask8) -1,
2576 __R);
2577 }
2578
2579 extern __inline __m128h
2580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_getmant_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2581 _mm_mask_getmant_round_sh (__m128h __W, __mmask8 __U, __m128h __A,
2582 __m128h __B, _MM_MANTISSA_NORM_ENUM __C,
2583 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2584 {
2585 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2586 (__v8hf) __B,
2587 (__D << 2) | __C,
2588 (__v8hf) __W,
2589 __U, __R);
2590 }
2591
2592 extern __inline __m128h
2593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_getmant_round_sh(__mmask8 __U,__m128h __A,__m128h __B,_MM_MANTISSA_NORM_ENUM __C,_MM_MANTISSA_SIGN_ENUM __D,const int __R)2594 _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
2595 _MM_MANTISSA_NORM_ENUM __C,
2596 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
2597 {
2598 return (__m128h) __builtin_ia32_getmantsh_mask_round ((__v8hf) __A,
2599 (__v8hf) __B,
2600 (__D << 2) | __C,
2601 (__v8hf)
2602 _mm_setzero_ph(),
2603 __U, __R);
2604 }
2605
2606 extern __inline __m512h
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_round_ph(__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2608 _mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
2609 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2610 {
2611 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2612 (__C << 2) | __B,
2613 _mm512_setzero_ph (),
2614 (__mmask32) -1, __R);
2615 }
2616
2617 extern __inline __m512h
2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_getmant_round_ph(__m512h __W,__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2619 _mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
2620 _MM_MANTISSA_NORM_ENUM __B,
2621 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2622 {
2623 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2624 (__C << 2) | __B,
2625 (__v32hf) __W, __U,
2626 __R);
2627 }
2628
2629 extern __inline __m512h
2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_getmant_round_ph(__mmask32 __U,__m512h __A,_MM_MANTISSA_NORM_ENUM __B,_MM_MANTISSA_SIGN_ENUM __C,const int __R)2631 _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
2632 _MM_MANTISSA_NORM_ENUM __B,
2633 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
2634 {
2635 return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
2636 (__C << 2) | __B,
2637 (__v32hf)
2638 _mm512_setzero_ph (),
2639 __U, __R);
2640 }
2641
2642 #else
2643 #define _mm512_getmant_ph(X, B, C) \
2644 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2645 (int)(((C)<<2) | (B)), \
2646 (__v32hf)(__m512h) \
2647 _mm512_setzero_ph(), \
2648 (__mmask32)-1, \
2649 _MM_FROUND_CUR_DIRECTION))
2650
2651 #define _mm512_mask_getmant_ph(W, U, X, B, C) \
2652 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2653 (int)(((C)<<2) | (B)), \
2654 (__v32hf)(__m512h)(W), \
2655 (__mmask32)(U), \
2656 _MM_FROUND_CUR_DIRECTION))
2657
2658
2659 #define _mm512_maskz_getmant_ph(U, X, B, C) \
2660 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2661 (int)(((C)<<2) | (B)), \
2662 (__v32hf)(__m512h) \
2663 _mm512_setzero_ph(), \
2664 (__mmask32)(U), \
2665 _MM_FROUND_CUR_DIRECTION))
2666
2667 #define _mm_getmant_sh(X, Y, C, D) \
2668 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2669 (__v8hf)(__m128h)(Y), \
2670 (int)(((D)<<2) | (C)), \
2671 (__v8hf)(__m128h) \
2672 _mm_setzero_ph (), \
2673 (__mmask8)-1, \
2674 _MM_FROUND_CUR_DIRECTION))
2675
2676 #define _mm_mask_getmant_sh(W, U, X, Y, C, D) \
2677 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2678 (__v8hf)(__m128h)(Y), \
2679 (int)(((D)<<2) | (C)), \
2680 (__v8hf)(__m128h)(W), \
2681 (__mmask8)(U), \
2682 _MM_FROUND_CUR_DIRECTION))
2683
2684 #define _mm_maskz_getmant_sh(U, X, Y, C, D) \
2685 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2686 (__v8hf)(__m128h)(Y), \
2687 (int)(((D)<<2) | (C)), \
2688 (__v8hf)(__m128h) \
2689 _mm_setzero_ph(), \
2690 (__mmask8)(U), \
2691 _MM_FROUND_CUR_DIRECTION))
2692
2693 #define _mm512_getmant_round_ph(X, B, C, R) \
2694 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2695 (int)(((C)<<2) | (B)), \
2696 (__v32hf)(__m512h) \
2697 _mm512_setzero_ph(), \
2698 (__mmask32)-1, \
2699 (R)))
2700
2701 #define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
2702 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2703 (int)(((C)<<2) | (B)), \
2704 (__v32hf)(__m512h)(W), \
2705 (__mmask32)(U), \
2706 (R)))
2707
2708
2709 #define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
2710 ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
2711 (int)(((C)<<2) | (B)), \
2712 (__v32hf)(__m512h) \
2713 _mm512_setzero_ph(), \
2714 (__mmask32)(U), \
2715 (R)))
2716
2717 #define _mm_getmant_round_sh(X, Y, C, D, R) \
2718 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2719 (__v8hf)(__m128h)(Y), \
2720 (int)(((D)<<2) | (C)), \
2721 (__v8hf)(__m128h) \
2722 _mm_setzero_ph (), \
2723 (__mmask8)-1, \
2724 (R)))
2725
2726 #define _mm_mask_getmant_round_sh(W, U, X, Y, C, D, R) \
2727 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2728 (__v8hf)(__m128h)(Y), \
2729 (int)(((D)<<2) | (C)), \
2730 (__v8hf)(__m128h)(W), \
2731 (__mmask8)(U), \
2732 (R)))
2733
2734 #define _mm_maskz_getmant_round_sh(U, X, Y, C, D, R) \
2735 ((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
2736 (__v8hf)(__m128h)(Y), \
2737 (int)(((D)<<2) | (C)), \
2738 (__v8hf)(__m128h) \
2739 _mm_setzero_ph(), \
2740 (__mmask8)(U), \
2741 (R)))
2742
2743 #endif /* __OPTIMIZE__ */
2744
2745 /* Intrinsics vmovw. */
2746 extern __inline __m128i
2747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi16_si128(short __A)2748 _mm_cvtsi16_si128 (short __A)
2749 {
2750 return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
2751 }
2752
2753 extern __inline short
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si16(__m128i __A)2755 _mm_cvtsi128_si16 (__m128i __A)
2756 {
2757 return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
2758 }
2759
2760 /* Intrinsics vmovsh. */
2761 extern __inline __m128h
2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_load_sh(__m128h __A,__mmask8 __B,_Float16 const * __C)2763 _mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
2764 {
2765 return __builtin_ia32_loadsh_mask (__C, __A, __B);
2766 }
2767
2768 extern __inline __m128h
2769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_load_sh(__mmask8 __A,_Float16 const * __B)2770 _mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
2771 {
2772 return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
2773 }
2774
2775 extern __inline void
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_store_sh(_Float16 const * __A,__mmask8 __B,__m128h __C)2777 _mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
2778 {
2779 __builtin_ia32_storesh_mask (__A, __C, __B);
2780 }
2781
2782 extern __inline __m128h
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sh(__m128h __A,__m128h __B)2784 _mm_move_sh (__m128h __A, __m128h __B)
2785 {
2786 __A[0] = __B[0];
2787 return __A;
2788 }
2789
2790 extern __inline __m128h
2791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_move_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)2792 _mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2793 {
2794 return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
2795 }
2796
2797 extern __inline __m128h
2798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_move_sh(__mmask8 __A,__m128h __B,__m128h __C)2799 _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
2800 {
2801 return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
2802 }
2803
2804 /* Intrinsics vcvtph2dq. */
2805 extern __inline __m512i
2806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi32(__m256h __A)2807 _mm512_cvtph_epi32 (__m256h __A)
2808 {
2809 return (__m512i)
2810 __builtin_ia32_vcvtph2dq512_mask_round (__A,
2811 (__v16si)
2812 _mm512_setzero_si512 (),
2813 (__mmask16) -1,
2814 _MM_FROUND_CUR_DIRECTION);
2815 }
2816
2817 extern __inline __m512i
2818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi32(__m512i __A,__mmask16 __B,__m256h __C)2819 _mm512_mask_cvtph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
2820 {
2821 return (__m512i)
2822 __builtin_ia32_vcvtph2dq512_mask_round (__C,
2823 (__v16si) __A,
2824 __B,
2825 _MM_FROUND_CUR_DIRECTION);
2826 }
2827
2828 extern __inline __m512i
2829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi32(__mmask16 __A,__m256h __B)2830 _mm512_maskz_cvtph_epi32 (__mmask16 __A, __m256h __B)
2831 {
2832 return (__m512i)
2833 __builtin_ia32_vcvtph2dq512_mask_round (__B,
2834 (__v16si)
2835 _mm512_setzero_si512 (),
2836 __A,
2837 _MM_FROUND_CUR_DIRECTION);
2838 }
2839
2840 #ifdef __OPTIMIZE__
2841 extern __inline __m512i
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi32(__m256h __A,int __B)2843 _mm512_cvt_roundph_epi32 (__m256h __A, int __B)
2844 {
2845 return (__m512i)
2846 __builtin_ia32_vcvtph2dq512_mask_round (__A,
2847 (__v16si)
2848 _mm512_setzero_si512 (),
2849 (__mmask16) -1,
2850 __B);
2851 }
2852
2853 extern __inline __m512i
2854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi32(__m512i __A,__mmask16 __B,__m256h __C,int __D)2855 _mm512_mask_cvt_roundph_epi32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2856 {
2857 return (__m512i)
2858 __builtin_ia32_vcvtph2dq512_mask_round (__C,
2859 (__v16si) __A,
2860 __B,
2861 __D);
2862 }
2863
2864 extern __inline __m512i
2865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi32(__mmask16 __A,__m256h __B,int __C)2866 _mm512_maskz_cvt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
2867 {
2868 return (__m512i)
2869 __builtin_ia32_vcvtph2dq512_mask_round (__B,
2870 (__v16si)
2871 _mm512_setzero_si512 (),
2872 __A,
2873 __C);
2874 }
2875
2876 #else
2877 #define _mm512_cvt_roundph_epi32(A, B) \
2878 ((__m512i) \
2879 __builtin_ia32_vcvtph2dq512_mask_round ((A), \
2880 (__v16si) \
2881 _mm512_setzero_si512 (), \
2882 (__mmask16)-1, \
2883 (B)))
2884
2885 #define _mm512_mask_cvt_roundph_epi32(A, B, C, D) \
2886 ((__m512i) \
2887 __builtin_ia32_vcvtph2dq512_mask_round ((C), (__v16si)(A), (B), (D)))
2888
2889 #define _mm512_maskz_cvt_roundph_epi32(A, B, C) \
2890 ((__m512i) \
2891 __builtin_ia32_vcvtph2dq512_mask_round ((B), \
2892 (__v16si) \
2893 _mm512_setzero_si512 (), \
2894 (A), \
2895 (C)))
2896
2897 #endif /* __OPTIMIZE__ */
2898
2899 /* Intrinsics vcvtph2udq. */
2900 extern __inline __m512i
2901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu32(__m256h __A)2902 _mm512_cvtph_epu32 (__m256h __A)
2903 {
2904 return (__m512i)
2905 __builtin_ia32_vcvtph2udq512_mask_round (__A,
2906 (__v16si)
2907 _mm512_setzero_si512 (),
2908 (__mmask16) -1,
2909 _MM_FROUND_CUR_DIRECTION);
2910 }
2911
2912 extern __inline __m512i
2913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu32(__m512i __A,__mmask16 __B,__m256h __C)2914 _mm512_mask_cvtph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
2915 {
2916 return (__m512i)
2917 __builtin_ia32_vcvtph2udq512_mask_round (__C,
2918 (__v16si) __A,
2919 __B,
2920 _MM_FROUND_CUR_DIRECTION);
2921 }
2922
2923 extern __inline __m512i
2924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu32(__mmask16 __A,__m256h __B)2925 _mm512_maskz_cvtph_epu32 (__mmask16 __A, __m256h __B)
2926 {
2927 return (__m512i)
2928 __builtin_ia32_vcvtph2udq512_mask_round (__B,
2929 (__v16si)
2930 _mm512_setzero_si512 (),
2931 __A,
2932 _MM_FROUND_CUR_DIRECTION);
2933 }
2934
2935 #ifdef __OPTIMIZE__
2936 extern __inline __m512i
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu32(__m256h __A,int __B)2938 _mm512_cvt_roundph_epu32 (__m256h __A, int __B)
2939 {
2940 return (__m512i)
2941 __builtin_ia32_vcvtph2udq512_mask_round (__A,
2942 (__v16si)
2943 _mm512_setzero_si512 (),
2944 (__mmask16) -1,
2945 __B);
2946 }
2947
2948 extern __inline __m512i
2949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu32(__m512i __A,__mmask16 __B,__m256h __C,int __D)2950 _mm512_mask_cvt_roundph_epu32 (__m512i __A, __mmask16 __B, __m256h __C, int __D)
2951 {
2952 return (__m512i)
2953 __builtin_ia32_vcvtph2udq512_mask_round (__C,
2954 (__v16si) __A,
2955 __B,
2956 __D);
2957 }
2958
2959 extern __inline __m512i
2960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu32(__mmask16 __A,__m256h __B,int __C)2961 _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
2962 {
2963 return (__m512i)
2964 __builtin_ia32_vcvtph2udq512_mask_round (__B,
2965 (__v16si)
2966 _mm512_setzero_si512 (),
2967 __A,
2968 __C);
2969 }
2970
2971 #else
2972 #define _mm512_cvt_roundph_epu32(A, B) \
2973 ((__m512i) \
2974 __builtin_ia32_vcvtph2udq512_mask_round ((A), \
2975 (__v16si) \
2976 _mm512_setzero_si512 (), \
2977 (__mmask16)-1, \
2978 (B)))
2979
2980 #define _mm512_mask_cvt_roundph_epu32(A, B, C, D) \
2981 ((__m512i) \
2982 __builtin_ia32_vcvtph2udq512_mask_round ((C), (__v16si)(A), (B), (D)))
2983
2984 #define _mm512_maskz_cvt_roundph_epu32(A, B, C) \
2985 ((__m512i) \
2986 __builtin_ia32_vcvtph2udq512_mask_round ((B), \
2987 (__v16si) \
2988 _mm512_setzero_si512 (), \
2989 (A), \
2990 (C)))
2991
2992 #endif /* __OPTIMIZE__ */
2993
2994 /* Intrinsics vcvttph2dq. */
2995 extern __inline __m512i
2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi32(__m256h __A)2997 _mm512_cvttph_epi32 (__m256h __A)
2998 {
2999 return (__m512i)
3000 __builtin_ia32_vcvttph2dq512_mask_round (__A,
3001 (__v16si)
3002 _mm512_setzero_si512 (),
3003 (__mmask16) -1,
3004 _MM_FROUND_CUR_DIRECTION);
3005 }
3006
3007 extern __inline __m512i
3008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi32(__m512i __A,__mmask16 __B,__m256h __C)3009 _mm512_mask_cvttph_epi32 (__m512i __A, __mmask16 __B, __m256h __C)
3010 {
3011 return (__m512i)
3012 __builtin_ia32_vcvttph2dq512_mask_round (__C,
3013 (__v16si) __A,
3014 __B,
3015 _MM_FROUND_CUR_DIRECTION);
3016 }
3017
3018 extern __inline __m512i
3019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi32(__mmask16 __A,__m256h __B)3020 _mm512_maskz_cvttph_epi32 (__mmask16 __A, __m256h __B)
3021 {
3022 return (__m512i)
3023 __builtin_ia32_vcvttph2dq512_mask_round (__B,
3024 (__v16si)
3025 _mm512_setzero_si512 (),
3026 __A,
3027 _MM_FROUND_CUR_DIRECTION);
3028 }
3029
3030 #ifdef __OPTIMIZE__
3031 extern __inline __m512i
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi32(__m256h __A,int __B)3033 _mm512_cvtt_roundph_epi32 (__m256h __A, int __B)
3034 {
3035 return (__m512i)
3036 __builtin_ia32_vcvttph2dq512_mask_round (__A,
3037 (__v16si)
3038 _mm512_setzero_si512 (),
3039 (__mmask16) -1,
3040 __B);
3041 }
3042
3043 extern __inline __m512i
3044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi32(__m512i __A,__mmask16 __B,__m256h __C,int __D)3045 _mm512_mask_cvtt_roundph_epi32 (__m512i __A, __mmask16 __B,
3046 __m256h __C, int __D)
3047 {
3048 return (__m512i)
3049 __builtin_ia32_vcvttph2dq512_mask_round (__C,
3050 (__v16si) __A,
3051 __B,
3052 __D);
3053 }
3054
3055 extern __inline __m512i
3056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi32(__mmask16 __A,__m256h __B,int __C)3057 _mm512_maskz_cvtt_roundph_epi32 (__mmask16 __A, __m256h __B, int __C)
3058 {
3059 return (__m512i)
3060 __builtin_ia32_vcvttph2dq512_mask_round (__B,
3061 (__v16si)
3062 _mm512_setzero_si512 (),
3063 __A,
3064 __C);
3065 }
3066
3067 #else
3068 #define _mm512_cvtt_roundph_epi32(A, B) \
3069 ((__m512i) \
3070 __builtin_ia32_vcvttph2dq512_mask_round ((A), \
3071 (__v16si) \
3072 (_mm512_setzero_si512 ()), \
3073 (__mmask16)(-1), (B)))
3074
3075 #define _mm512_mask_cvtt_roundph_epi32(A, B, C, D) \
3076 ((__m512i) \
3077 __builtin_ia32_vcvttph2dq512_mask_round ((C), \
3078 (__v16si)(A), \
3079 (B), \
3080 (D)))
3081
3082 #define _mm512_maskz_cvtt_roundph_epi32(A, B, C) \
3083 ((__m512i) \
3084 __builtin_ia32_vcvttph2dq512_mask_round ((B), \
3085 (__v16si) \
3086 _mm512_setzero_si512 (), \
3087 (A), \
3088 (C)))
3089
3090 #endif /* __OPTIMIZE__ */
3091
3092 /* Intrinsics vcvttph2udq. */
3093 extern __inline __m512i
3094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu32(__m256h __A)3095 _mm512_cvttph_epu32 (__m256h __A)
3096 {
3097 return (__m512i)
3098 __builtin_ia32_vcvttph2udq512_mask_round (__A,
3099 (__v16si)
3100 _mm512_setzero_si512 (),
3101 (__mmask16) -1,
3102 _MM_FROUND_CUR_DIRECTION);
3103 }
3104
3105 extern __inline __m512i
3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu32(__m512i __A,__mmask16 __B,__m256h __C)3107 _mm512_mask_cvttph_epu32 (__m512i __A, __mmask16 __B, __m256h __C)
3108 {
3109 return (__m512i)
3110 __builtin_ia32_vcvttph2udq512_mask_round (__C,
3111 (__v16si) __A,
3112 __B,
3113 _MM_FROUND_CUR_DIRECTION);
3114 }
3115
3116 extern __inline __m512i
3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu32(__mmask16 __A,__m256h __B)3118 _mm512_maskz_cvttph_epu32 (__mmask16 __A, __m256h __B)
3119 {
3120 return (__m512i)
3121 __builtin_ia32_vcvttph2udq512_mask_round (__B,
3122 (__v16si)
3123 _mm512_setzero_si512 (),
3124 __A,
3125 _MM_FROUND_CUR_DIRECTION);
3126 }
3127
3128 #ifdef __OPTIMIZE__
3129 extern __inline __m512i
3130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu32(__m256h __A,int __B)3131 _mm512_cvtt_roundph_epu32 (__m256h __A, int __B)
3132 {
3133 return (__m512i)
3134 __builtin_ia32_vcvttph2udq512_mask_round (__A,
3135 (__v16si)
3136 _mm512_setzero_si512 (),
3137 (__mmask16) -1,
3138 __B);
3139 }
3140
3141 extern __inline __m512i
3142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu32(__m512i __A,__mmask16 __B,__m256h __C,int __D)3143 _mm512_mask_cvtt_roundph_epu32 (__m512i __A, __mmask16 __B,
3144 __m256h __C, int __D)
3145 {
3146 return (__m512i)
3147 __builtin_ia32_vcvttph2udq512_mask_round (__C,
3148 (__v16si) __A,
3149 __B,
3150 __D);
3151 }
3152
3153 extern __inline __m512i
3154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu32(__mmask16 __A,__m256h __B,int __C)3155 _mm512_maskz_cvtt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
3156 {
3157 return (__m512i)
3158 __builtin_ia32_vcvttph2udq512_mask_round (__B,
3159 (__v16si)
3160 _mm512_setzero_si512 (),
3161 __A,
3162 __C);
3163 }
3164
3165 #else
3166 #define _mm512_cvtt_roundph_epu32(A, B) \
3167 ((__m512i) \
3168 __builtin_ia32_vcvttph2udq512_mask_round ((A), \
3169 (__v16si) \
3170 _mm512_setzero_si512 (), \
3171 (__mmask16)-1, \
3172 (B)))
3173
3174 #define _mm512_mask_cvtt_roundph_epu32(A, B, C, D) \
3175 ((__m512i) \
3176 __builtin_ia32_vcvttph2udq512_mask_round ((C), \
3177 (__v16si)(A), \
3178 (B), \
3179 (D)))
3180
3181 #define _mm512_maskz_cvtt_roundph_epu32(A, B, C) \
3182 ((__m512i) \
3183 __builtin_ia32_vcvttph2udq512_mask_round ((B), \
3184 (__v16si) \
3185 _mm512_setzero_si512 (), \
3186 (A), \
3187 (C)))
3188
3189 #endif /* __OPTIMIZE__ */
3190
3191 /* Intrinsics vcvtdq2ph. */
3192 extern __inline __m256h
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_ph(__m512i __A)3194 _mm512_cvtepi32_ph (__m512i __A)
3195 {
3196 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3197 _mm256_setzero_ph (),
3198 (__mmask16) -1,
3199 _MM_FROUND_CUR_DIRECTION);
3200 }
3201
3202 extern __inline __m256h
3203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi32_ph(__m256h __A,__mmask16 __B,__m512i __C)3204 _mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3205 {
3206 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3207 __A,
3208 __B,
3209 _MM_FROUND_CUR_DIRECTION);
3210 }
3211
3212 extern __inline __m256h
3213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi32_ph(__mmask16 __A,__m512i __B)3214 _mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
3215 {
3216 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3217 _mm256_setzero_ph (),
3218 __A,
3219 _MM_FROUND_CUR_DIRECTION);
3220 }
3221
3222 #ifdef __OPTIMIZE__
3223 extern __inline __m256h
3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi32_ph(__m512i __A,int __B)3225 _mm512_cvt_roundepi32_ph (__m512i __A, int __B)
3226 {
3227 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __A,
3228 _mm256_setzero_ph (),
3229 (__mmask16) -1,
3230 __B);
3231 }
3232
3233 extern __inline __m256h
3234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi32_ph(__m256h __A,__mmask16 __B,__m512i __C,int __D)3235 _mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3236 {
3237 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __C,
3238 __A,
3239 __B,
3240 __D);
3241 }
3242
3243 extern __inline __m256h
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi32_ph(__mmask16 __A,__m512i __B,int __C)3245 _mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
3246 {
3247 return __builtin_ia32_vcvtdq2ph512_mask_round ((__v16si) __B,
3248 _mm256_setzero_ph (),
3249 __A,
3250 __C);
3251 }
3252
3253 #else
3254 #define _mm512_cvt_roundepi32_ph(A, B) \
3255 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(A), \
3256 _mm256_setzero_ph (), \
3257 (__mmask16)-1, \
3258 (B)))
3259
3260 #define _mm512_mask_cvt_roundepi32_ph(A, B, C, D) \
3261 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(C), \
3262 (A), \
3263 (B), \
3264 (D)))
3265
3266 #define _mm512_maskz_cvt_roundepi32_ph(A, B, C) \
3267 (__builtin_ia32_vcvtdq2ph512_mask_round ((__v16si)(B), \
3268 _mm256_setzero_ph (), \
3269 (A), \
3270 (C)))
3271
3272 #endif /* __OPTIMIZE__ */
3273
3274 /* Intrinsics vcvtudq2ph. */
3275 extern __inline __m256h
3276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu32_ph(__m512i __A)3277 _mm512_cvtepu32_ph (__m512i __A)
3278 {
3279 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3280 _mm256_setzero_ph (),
3281 (__mmask16) -1,
3282 _MM_FROUND_CUR_DIRECTION);
3283 }
3284
3285 extern __inline __m256h
3286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu32_ph(__m256h __A,__mmask16 __B,__m512i __C)3287 _mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
3288 {
3289 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3290 __A,
3291 __B,
3292 _MM_FROUND_CUR_DIRECTION);
3293 }
3294
3295 extern __inline __m256h
3296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu32_ph(__mmask16 __A,__m512i __B)3297 _mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
3298 {
3299 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3300 _mm256_setzero_ph (),
3301 __A,
3302 _MM_FROUND_CUR_DIRECTION);
3303 }
3304
3305 #ifdef __OPTIMIZE__
3306 extern __inline __m256h
3307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu32_ph(__m512i __A,int __B)3308 _mm512_cvt_roundepu32_ph (__m512i __A, int __B)
3309 {
3310 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __A,
3311 _mm256_setzero_ph (),
3312 (__mmask16) -1,
3313 __B);
3314 }
3315
3316 extern __inline __m256h
3317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu32_ph(__m256h __A,__mmask16 __B,__m512i __C,int __D)3318 _mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
3319 {
3320 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __C,
3321 __A,
3322 __B,
3323 __D);
3324 }
3325
3326 extern __inline __m256h
3327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu32_ph(__mmask16 __A,__m512i __B,int __C)3328 _mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
3329 {
3330 return __builtin_ia32_vcvtudq2ph512_mask_round ((__v16si) __B,
3331 _mm256_setzero_ph (),
3332 __A,
3333 __C);
3334 }
3335
3336 #else
3337 #define _mm512_cvt_roundepu32_ph(A, B) \
3338 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)(A), \
3339 _mm256_setzero_ph (), \
3340 (__mmask16)-1, \
3341 B))
3342
3343 #define _mm512_mask_cvt_roundepu32_ph(A, B, C, D) \
3344 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)C, \
3345 A, \
3346 B, \
3347 D))
3348
3349 #define _mm512_maskz_cvt_roundepu32_ph(A, B, C) \
3350 (__builtin_ia32_vcvtudq2ph512_mask_round ((__v16si)B, \
3351 _mm256_setzero_ph (), \
3352 A, \
3353 C))
3354
3355 #endif /* __OPTIMIZE__ */
3356
3357 /* Intrinsics vcvtph2qq. */
3358 extern __inline __m512i
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi64(__m128h __A)3360 _mm512_cvtph_epi64 (__m128h __A)
3361 {
3362 return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3363 _mm512_setzero_si512 (),
3364 (__mmask8) -1,
3365 _MM_FROUND_CUR_DIRECTION);
3366 }
3367
3368 extern __inline __m512i
3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi64(__m512i __A,__mmask8 __B,__m128h __C)3370 _mm512_mask_cvtph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3371 {
3372 return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B,
3373 _MM_FROUND_CUR_DIRECTION);
3374 }
3375
3376 extern __inline __m512i
3377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi64(__mmask8 __A,__m128h __B)3378 _mm512_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
3379 {
3380 return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3381 _mm512_setzero_si512 (),
3382 __A,
3383 _MM_FROUND_CUR_DIRECTION);
3384 }
3385
3386 #ifdef __OPTIMIZE__
3387 extern __inline __m512i
3388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi64(__m128h __A,int __B)3389 _mm512_cvt_roundph_epi64 (__m128h __A, int __B)
3390 {
3391 return __builtin_ia32_vcvtph2qq512_mask_round (__A,
3392 _mm512_setzero_si512 (),
3393 (__mmask8) -1,
3394 __B);
3395 }
3396
3397 extern __inline __m512i
3398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3399 _mm512_mask_cvt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3400 {
3401 return __builtin_ia32_vcvtph2qq512_mask_round (__C, __A, __B, __D);
3402 }
3403
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi64(__mmask8 __A,__m128h __B,int __C)3406 _mm512_maskz_cvt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3407 {
3408 return __builtin_ia32_vcvtph2qq512_mask_round (__B,
3409 _mm512_setzero_si512 (),
3410 __A,
3411 __C);
3412 }
3413
3414 #else
3415 #define _mm512_cvt_roundph_epi64(A, B) \
3416 (__builtin_ia32_vcvtph2qq512_mask_round ((A), \
3417 _mm512_setzero_si512 (), \
3418 (__mmask8)-1, \
3419 (B)))
3420
3421 #define _mm512_mask_cvt_roundph_epi64(A, B, C, D) \
3422 (__builtin_ia32_vcvtph2qq512_mask_round ((C), (A), (B), (D)))
3423
3424 #define _mm512_maskz_cvt_roundph_epi64(A, B, C) \
3425 (__builtin_ia32_vcvtph2qq512_mask_round ((B), \
3426 _mm512_setzero_si512 (), \
3427 (A), \
3428 (C)))
3429
3430 #endif /* __OPTIMIZE__ */
3431
3432 /* Intrinsics vcvtph2uqq. */
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu64(__m128h __A)3435 _mm512_cvtph_epu64 (__m128h __A)
3436 {
3437 return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3438 _mm512_setzero_si512 (),
3439 (__mmask8) -1,
3440 _MM_FROUND_CUR_DIRECTION);
3441 }
3442
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu64(__m512i __A,__mmask8 __B,__m128h __C)3445 _mm512_mask_cvtph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3446 {
3447 return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B,
3448 _MM_FROUND_CUR_DIRECTION);
3449 }
3450
3451 extern __inline __m512i
3452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu64(__mmask8 __A,__m128h __B)3453 _mm512_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
3454 {
3455 return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3456 _mm512_setzero_si512 (),
3457 __A,
3458 _MM_FROUND_CUR_DIRECTION);
3459 }
3460
3461 #ifdef __OPTIMIZE__
3462
3463 extern __inline __m512i
3464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu64(__m128h __A,int __B)3465 _mm512_cvt_roundph_epu64 (__m128h __A, int __B)
3466 {
3467 return __builtin_ia32_vcvtph2uqq512_mask_round (__A,
3468 _mm512_setzero_si512 (),
3469 (__mmask8) -1,
3470 __B);
3471 }
3472
3473 extern __inline __m512i
3474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3475 _mm512_mask_cvt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3476 {
3477 return __builtin_ia32_vcvtph2uqq512_mask_round (__C, __A, __B, __D);
3478 }
3479
3480 extern __inline __m512i
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu64(__mmask8 __A,__m128h __B,int __C)3482 _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3483 {
3484 return __builtin_ia32_vcvtph2uqq512_mask_round (__B,
3485 _mm512_setzero_si512 (),
3486 __A,
3487 __C);
3488 }
3489
3490 #else
3491 #define _mm512_cvt_roundph_epu64(A, B) \
3492 (__builtin_ia32_vcvtph2uqq512_mask_round ((A), \
3493 _mm512_setzero_si512 (), \
3494 (__mmask8)-1, \
3495 (B)))
3496
3497 #define _mm512_mask_cvt_roundph_epu64(A, B, C, D) \
3498 (__builtin_ia32_vcvtph2uqq512_mask_round ((C), (A), (B), (D)))
3499
3500 #define _mm512_maskz_cvt_roundph_epu64(A, B, C) \
3501 (__builtin_ia32_vcvtph2uqq512_mask_round ((B), \
3502 _mm512_setzero_si512 (), \
3503 (A), \
3504 (C)))
3505
3506 #endif /* __OPTIMIZE__ */
3507
3508 /* Intrinsics vcvttph2qq. */
3509 extern __inline __m512i
3510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi64(__m128h __A)3511 _mm512_cvttph_epi64 (__m128h __A)
3512 {
3513 return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3514 _mm512_setzero_si512 (),
3515 (__mmask8) -1,
3516 _MM_FROUND_CUR_DIRECTION);
3517 }
3518
3519 extern __inline __m512i
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi64(__m512i __A,__mmask8 __B,__m128h __C)3521 _mm512_mask_cvttph_epi64 (__m512i __A, __mmask8 __B, __m128h __C)
3522 {
3523 return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B,
3524 _MM_FROUND_CUR_DIRECTION);
3525 }
3526
3527 extern __inline __m512i
3528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi64(__mmask8 __A,__m128h __B)3529 _mm512_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
3530 {
3531 return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3532 _mm512_setzero_si512 (),
3533 __A,
3534 _MM_FROUND_CUR_DIRECTION);
3535 }
3536
3537 #ifdef __OPTIMIZE__
3538 extern __inline __m512i
3539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi64(__m128h __A,int __B)3540 _mm512_cvtt_roundph_epi64 (__m128h __A, int __B)
3541 {
3542 return __builtin_ia32_vcvttph2qq512_mask_round (__A,
3543 _mm512_setzero_si512 (),
3544 (__mmask8) -1,
3545 __B);
3546 }
3547
3548 extern __inline __m512i
3549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3550 _mm512_mask_cvtt_roundph_epi64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3551 {
3552 return __builtin_ia32_vcvttph2qq512_mask_round (__C, __A, __B, __D);
3553 }
3554
3555 extern __inline __m512i
3556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi64(__mmask8 __A,__m128h __B,int __C)3557 _mm512_maskz_cvtt_roundph_epi64 (__mmask8 __A, __m128h __B, int __C)
3558 {
3559 return __builtin_ia32_vcvttph2qq512_mask_round (__B,
3560 _mm512_setzero_si512 (),
3561 __A,
3562 __C);
3563 }
3564
3565 #else
3566 #define _mm512_cvtt_roundph_epi64(A, B) \
3567 (__builtin_ia32_vcvttph2qq512_mask_round ((A), \
3568 _mm512_setzero_si512 (), \
3569 (__mmask8)-1, \
3570 (B)))
3571
3572 #define _mm512_mask_cvtt_roundph_epi64(A, B, C, D) \
3573 __builtin_ia32_vcvttph2qq512_mask_round ((C), (A), (B), (D))
3574
3575 #define _mm512_maskz_cvtt_roundph_epi64(A, B, C) \
3576 (__builtin_ia32_vcvttph2qq512_mask_round ((B), \
3577 _mm512_setzero_si512 (), \
3578 (A), \
3579 (C)))
3580
3581 #endif /* __OPTIMIZE__ */
3582
3583 /* Intrinsics vcvttph2uqq. */
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu64(__m128h __A)3586 _mm512_cvttph_epu64 (__m128h __A)
3587 {
3588 return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3589 _mm512_setzero_si512 (),
3590 (__mmask8) -1,
3591 _MM_FROUND_CUR_DIRECTION);
3592 }
3593
3594 extern __inline __m512i
3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu64(__m512i __A,__mmask8 __B,__m128h __C)3596 _mm512_mask_cvttph_epu64 (__m512i __A, __mmask8 __B, __m128h __C)
3597 {
3598 return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B,
3599 _MM_FROUND_CUR_DIRECTION);
3600 }
3601
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu64(__mmask8 __A,__m128h __B)3604 _mm512_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
3605 {
3606 return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3607 _mm512_setzero_si512 (),
3608 __A,
3609 _MM_FROUND_CUR_DIRECTION);
3610 }
3611
3612 #ifdef __OPTIMIZE__
3613 extern __inline __m512i
3614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu64(__m128h __A,int __B)3615 _mm512_cvtt_roundph_epu64 (__m128h __A, int __B)
3616 {
3617 return __builtin_ia32_vcvttph2uqq512_mask_round (__A,
3618 _mm512_setzero_si512 (),
3619 (__mmask8) -1,
3620 __B);
3621 }
3622
3623 extern __inline __m512i
3624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu64(__m512i __A,__mmask8 __B,__m128h __C,int __D)3625 _mm512_mask_cvtt_roundph_epu64 (__m512i __A, __mmask8 __B, __m128h __C, int __D)
3626 {
3627 return __builtin_ia32_vcvttph2uqq512_mask_round (__C, __A, __B, __D);
3628 }
3629
3630 extern __inline __m512i
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu64(__mmask8 __A,__m128h __B,int __C)3632 _mm512_maskz_cvtt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
3633 {
3634 return __builtin_ia32_vcvttph2uqq512_mask_round (__B,
3635 _mm512_setzero_si512 (),
3636 __A,
3637 __C);
3638 }
3639
3640 #else
3641 #define _mm512_cvtt_roundph_epu64(A, B) \
3642 (__builtin_ia32_vcvttph2uqq512_mask_round ((A), \
3643 _mm512_setzero_si512 (), \
3644 (__mmask8)-1, \
3645 (B)))
3646
3647 #define _mm512_mask_cvtt_roundph_epu64(A, B, C, D) \
3648 __builtin_ia32_vcvttph2uqq512_mask_round ((C), (A), (B), (D))
3649
3650 #define _mm512_maskz_cvtt_roundph_epu64(A, B, C) \
3651 (__builtin_ia32_vcvttph2uqq512_mask_round ((B), \
3652 _mm512_setzero_si512 (), \
3653 (A), \
3654 (C)))
3655
3656 #endif /* __OPTIMIZE__ */
3657
3658 /* Intrinsics vcvtqq2ph. */
3659 extern __inline __m128h
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_ph(__m512i __A)3661 _mm512_cvtepi64_ph (__m512i __A)
3662 {
3663 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3664 _mm_setzero_ph (),
3665 (__mmask8) -1,
3666 _MM_FROUND_CUR_DIRECTION);
3667 }
3668
3669 extern __inline __m128h
3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi64_ph(__m128h __A,__mmask8 __B,__m512i __C)3671 _mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3672 {
3673 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3674 __A,
3675 __B,
3676 _MM_FROUND_CUR_DIRECTION);
3677 }
3678
3679 extern __inline __m128h
3680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi64_ph(__mmask8 __A,__m512i __B)3681 _mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
3682 {
3683 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3684 _mm_setzero_ph (),
3685 __A,
3686 _MM_FROUND_CUR_DIRECTION);
3687 }
3688
3689 #ifdef __OPTIMIZE__
3690 extern __inline __m128h
3691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi64_ph(__m512i __A,int __B)3692 _mm512_cvt_roundepi64_ph (__m512i __A, int __B)
3693 {
3694 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __A,
3695 _mm_setzero_ph (),
3696 (__mmask8) -1,
3697 __B);
3698 }
3699
3700 extern __inline __m128h
3701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi64_ph(__m128h __A,__mmask8 __B,__m512i __C,int __D)3702 _mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3703 {
3704 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __C,
3705 __A,
3706 __B,
3707 __D);
3708 }
3709
3710 extern __inline __m128h
3711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi64_ph(__mmask8 __A,__m512i __B,int __C)3712 _mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
3713 {
3714 return __builtin_ia32_vcvtqq2ph512_mask_round ((__v8di) __B,
3715 _mm_setzero_ph (),
3716 __A,
3717 __C);
3718 }
3719
3720 #else
3721 #define _mm512_cvt_roundepi64_ph(A, B) \
3722 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(A), \
3723 _mm_setzero_ph (), \
3724 (__mmask8)-1, \
3725 (B)))
3726
3727 #define _mm512_mask_cvt_roundepi64_ph(A, B, C, D) \
3728 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3729
3730 #define _mm512_maskz_cvt_roundepi64_ph(A, B, C) \
3731 (__builtin_ia32_vcvtqq2ph512_mask_round ((__v8di)(B), \
3732 _mm_setzero_ph (), \
3733 (A), \
3734 (C)))
3735
3736 #endif /* __OPTIMIZE__ */
3737
3738 /* Intrinsics vcvtuqq2ph. */
3739 extern __inline __m128h
3740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu64_ph(__m512i __A)3741 _mm512_cvtepu64_ph (__m512i __A)
3742 {
3743 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3744 _mm_setzero_ph (),
3745 (__mmask8) -1,
3746 _MM_FROUND_CUR_DIRECTION);
3747 }
3748
3749 extern __inline __m128h
3750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu64_ph(__m128h __A,__mmask8 __B,__m512i __C)3751 _mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
3752 {
3753 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3754 __A,
3755 __B,
3756 _MM_FROUND_CUR_DIRECTION);
3757 }
3758
3759 extern __inline __m128h
3760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu64_ph(__mmask8 __A,__m512i __B)3761 _mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
3762 {
3763 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3764 _mm_setzero_ph (),
3765 __A,
3766 _MM_FROUND_CUR_DIRECTION);
3767 }
3768
3769 #ifdef __OPTIMIZE__
3770 extern __inline __m128h
3771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu64_ph(__m512i __A,int __B)3772 _mm512_cvt_roundepu64_ph (__m512i __A, int __B)
3773 {
3774 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __A,
3775 _mm_setzero_ph (),
3776 (__mmask8) -1,
3777 __B);
3778 }
3779
3780 extern __inline __m128h
3781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu64_ph(__m128h __A,__mmask8 __B,__m512i __C,int __D)3782 _mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
3783 {
3784 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __C,
3785 __A,
3786 __B,
3787 __D);
3788 }
3789
3790 extern __inline __m128h
3791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu64_ph(__mmask8 __A,__m512i __B,int __C)3792 _mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
3793 {
3794 return __builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di) __B,
3795 _mm_setzero_ph (),
3796 __A,
3797 __C);
3798 }
3799
3800 #else
3801 #define _mm512_cvt_roundepu64_ph(A, B) \
3802 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(A), \
3803 _mm_setzero_ph (), \
3804 (__mmask8)-1, \
3805 (B)))
3806
3807 #define _mm512_mask_cvt_roundepu64_ph(A, B, C, D) \
3808 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(C), (A), (B), (D)))
3809
3810 #define _mm512_maskz_cvt_roundepu64_ph(A, B, C) \
3811 (__builtin_ia32_vcvtuqq2ph512_mask_round ((__v8di)(B), \
3812 _mm_setzero_ph (), \
3813 (A), \
3814 (C)))
3815
3816 #endif /* __OPTIMIZE__ */
3817
3818 /* Intrinsics vcvtph2w. */
3819 extern __inline __m512i
3820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epi16(__m512h __A)3821 _mm512_cvtph_epi16 (__m512h __A)
3822 {
3823 return (__m512i)
3824 __builtin_ia32_vcvtph2w512_mask_round (__A,
3825 (__v32hi)
3826 _mm512_setzero_si512 (),
3827 (__mmask32) -1,
3828 _MM_FROUND_CUR_DIRECTION);
3829 }
3830
3831 extern __inline __m512i
3832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epi16(__m512i __A,__mmask32 __B,__m512h __C)3833 _mm512_mask_cvtph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
3834 {
3835 return (__m512i)
3836 __builtin_ia32_vcvtph2w512_mask_round (__C,
3837 (__v32hi) __A,
3838 __B,
3839 _MM_FROUND_CUR_DIRECTION);
3840 }
3841
3842 extern __inline __m512i
3843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epi16(__mmask32 __A,__m512h __B)3844 _mm512_maskz_cvtph_epi16 (__mmask32 __A, __m512h __B)
3845 {
3846 return (__m512i)
3847 __builtin_ia32_vcvtph2w512_mask_round (__B,
3848 (__v32hi)
3849 _mm512_setzero_si512 (),
3850 __A,
3851 _MM_FROUND_CUR_DIRECTION);
3852 }
3853
3854 #ifdef __OPTIMIZE__
3855 extern __inline __m512i
3856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epi16(__m512h __A,int __B)3857 _mm512_cvt_roundph_epi16 (__m512h __A, int __B)
3858 {
3859 return (__m512i)
3860 __builtin_ia32_vcvtph2w512_mask_round (__A,
3861 (__v32hi)
3862 _mm512_setzero_si512 (),
3863 (__mmask32) -1,
3864 __B);
3865 }
3866
3867 extern __inline __m512i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epi16(__m512i __A,__mmask32 __B,__m512h __C,int __D)3869 _mm512_mask_cvt_roundph_epi16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3870 {
3871 return (__m512i)
3872 __builtin_ia32_vcvtph2w512_mask_round (__C,
3873 (__v32hi) __A,
3874 __B,
3875 __D);
3876 }
3877
3878 extern __inline __m512i
3879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epi16(__mmask32 __A,__m512h __B,int __C)3880 _mm512_maskz_cvt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
3881 {
3882 return (__m512i)
3883 __builtin_ia32_vcvtph2w512_mask_round (__B,
3884 (__v32hi)
3885 _mm512_setzero_si512 (),
3886 __A,
3887 __C);
3888 }
3889
3890 #else
3891 #define _mm512_cvt_roundph_epi16(A, B) \
3892 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((A), \
3893 (__v32hi) \
3894 _mm512_setzero_si512 (), \
3895 (__mmask32)-1, \
3896 (B)))
3897
3898 #define _mm512_mask_cvt_roundph_epi16(A, B, C, D) \
3899 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((C), \
3900 (__v32hi)(A), \
3901 (B), \
3902 (D)))
3903
3904 #define _mm512_maskz_cvt_roundph_epi16(A, B, C) \
3905 ((__m512i)__builtin_ia32_vcvtph2w512_mask_round ((B), \
3906 (__v32hi) \
3907 _mm512_setzero_si512 (), \
3908 (A), \
3909 (C)))
3910
3911 #endif /* __OPTIMIZE__ */
3912
3913 /* Intrinsics vcvtph2uw. */
3914 extern __inline __m512i
3915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_epu16(__m512h __A)3916 _mm512_cvtph_epu16 (__m512h __A)
3917 {
3918 return (__m512i)
3919 __builtin_ia32_vcvtph2uw512_mask_round (__A,
3920 (__v32hi)
3921 _mm512_setzero_si512 (),
3922 (__mmask32) -1,
3923 _MM_FROUND_CUR_DIRECTION);
3924 }
3925
3926 extern __inline __m512i
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_epu16(__m512i __A,__mmask32 __B,__m512h __C)3928 _mm512_mask_cvtph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
3929 {
3930 return (__m512i)
3931 __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B,
3932 _MM_FROUND_CUR_DIRECTION);
3933 }
3934
3935 extern __inline __m512i
3936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_epu16(__mmask32 __A,__m512h __B)3937 _mm512_maskz_cvtph_epu16 (__mmask32 __A, __m512h __B)
3938 {
3939 return (__m512i)
3940 __builtin_ia32_vcvtph2uw512_mask_round (__B,
3941 (__v32hi)
3942 _mm512_setzero_si512 (),
3943 __A,
3944 _MM_FROUND_CUR_DIRECTION);
3945 }
3946
3947 #ifdef __OPTIMIZE__
3948 extern __inline __m512i
3949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_epu16(__m512h __A,int __B)3950 _mm512_cvt_roundph_epu16 (__m512h __A, int __B)
3951 {
3952 return (__m512i)
3953 __builtin_ia32_vcvtph2uw512_mask_round (__A,
3954 (__v32hi)
3955 _mm512_setzero_si512 (),
3956 (__mmask32) -1,
3957 __B);
3958 }
3959
3960 extern __inline __m512i
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_epu16(__m512i __A,__mmask32 __B,__m512h __C,int __D)3962 _mm512_mask_cvt_roundph_epu16 (__m512i __A, __mmask32 __B, __m512h __C, int __D)
3963 {
3964 return (__m512i)
3965 __builtin_ia32_vcvtph2uw512_mask_round (__C, (__v32hi) __A, __B, __D);
3966 }
3967
3968 extern __inline __m512i
3969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_epu16(__mmask32 __A,__m512h __B,int __C)3970 _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
3971 {
3972 return (__m512i)
3973 __builtin_ia32_vcvtph2uw512_mask_round (__B,
3974 (__v32hi)
3975 _mm512_setzero_si512 (),
3976 __A,
3977 __C);
3978 }
3979
3980 #else
3981 #define _mm512_cvt_roundph_epu16(A, B) \
3982 ((__m512i) \
3983 __builtin_ia32_vcvtph2uw512_mask_round ((A), \
3984 (__v32hi) \
3985 _mm512_setzero_si512 (), \
3986 (__mmask32)-1, (B)))
3987
3988 #define _mm512_mask_cvt_roundph_epu16(A, B, C, D) \
3989 ((__m512i) \
3990 __builtin_ia32_vcvtph2uw512_mask_round ((C), (__v32hi)(A), (B), (D)))
3991
3992 #define _mm512_maskz_cvt_roundph_epu16(A, B, C) \
3993 ((__m512i) \
3994 __builtin_ia32_vcvtph2uw512_mask_round ((B), \
3995 (__v32hi) \
3996 _mm512_setzero_si512 (), \
3997 (A), \
3998 (C)))
3999
4000 #endif /* __OPTIMIZE__ */
4001
4002 /* Intrinsics vcvttph2w. */
4003 extern __inline __m512i
4004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epi16(__m512h __A)4005 _mm512_cvttph_epi16 (__m512h __A)
4006 {
4007 return (__m512i)
4008 __builtin_ia32_vcvttph2w512_mask_round (__A,
4009 (__v32hi)
4010 _mm512_setzero_si512 (),
4011 (__mmask32) -1,
4012 _MM_FROUND_CUR_DIRECTION);
4013 }
4014
4015 extern __inline __m512i
4016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epi16(__m512i __A,__mmask32 __B,__m512h __C)4017 _mm512_mask_cvttph_epi16 (__m512i __A, __mmask32 __B, __m512h __C)
4018 {
4019 return (__m512i)
4020 __builtin_ia32_vcvttph2w512_mask_round (__C,
4021 (__v32hi) __A,
4022 __B,
4023 _MM_FROUND_CUR_DIRECTION);
4024 }
4025
4026 extern __inline __m512i
4027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epi16(__mmask32 __A,__m512h __B)4028 _mm512_maskz_cvttph_epi16 (__mmask32 __A, __m512h __B)
4029 {
4030 return (__m512i)
4031 __builtin_ia32_vcvttph2w512_mask_round (__B,
4032 (__v32hi)
4033 _mm512_setzero_si512 (),
4034 __A,
4035 _MM_FROUND_CUR_DIRECTION);
4036 }
4037
4038 #ifdef __OPTIMIZE__
4039 extern __inline __m512i
4040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epi16(__m512h __A,int __B)4041 _mm512_cvtt_roundph_epi16 (__m512h __A, int __B)
4042 {
4043 return (__m512i)
4044 __builtin_ia32_vcvttph2w512_mask_round (__A,
4045 (__v32hi)
4046 _mm512_setzero_si512 (),
4047 (__mmask32) -1,
4048 __B);
4049 }
4050
4051 extern __inline __m512i
4052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epi16(__m512i __A,__mmask32 __B,__m512h __C,int __D)4053 _mm512_mask_cvtt_roundph_epi16 (__m512i __A, __mmask32 __B,
4054 __m512h __C, int __D)
4055 {
4056 return (__m512i)
4057 __builtin_ia32_vcvttph2w512_mask_round (__C,
4058 (__v32hi) __A,
4059 __B,
4060 __D);
4061 }
4062
4063 extern __inline __m512i
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epi16(__mmask32 __A,__m512h __B,int __C)4065 _mm512_maskz_cvtt_roundph_epi16 (__mmask32 __A, __m512h __B, int __C)
4066 {
4067 return (__m512i)
4068 __builtin_ia32_vcvttph2w512_mask_round (__B,
4069 (__v32hi)
4070 _mm512_setzero_si512 (),
4071 __A,
4072 __C);
4073 }
4074
4075 #else
4076 #define _mm512_cvtt_roundph_epi16(A, B) \
4077 ((__m512i) \
4078 __builtin_ia32_vcvttph2w512_mask_round ((A), \
4079 (__v32hi) \
4080 _mm512_setzero_si512 (), \
4081 (__mmask32)-1, \
4082 (B)))
4083
4084 #define _mm512_mask_cvtt_roundph_epi16(A, B, C, D) \
4085 ((__m512i) \
4086 __builtin_ia32_vcvttph2w512_mask_round ((C), \
4087 (__v32hi)(A), \
4088 (B), \
4089 (D)))
4090
4091 #define _mm512_maskz_cvtt_roundph_epi16(A, B, C) \
4092 ((__m512i) \
4093 __builtin_ia32_vcvttph2w512_mask_round ((B), \
4094 (__v32hi) \
4095 _mm512_setzero_si512 (), \
4096 (A), \
4097 (C)))
4098
4099 #endif /* __OPTIMIZE__ */
4100
4101 /* Intrinsics vcvttph2uw. */
4102 extern __inline __m512i
4103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvttph_epu16(__m512h __A)4104 _mm512_cvttph_epu16 (__m512h __A)
4105 {
4106 return (__m512i)
4107 __builtin_ia32_vcvttph2uw512_mask_round (__A,
4108 (__v32hi)
4109 _mm512_setzero_si512 (),
4110 (__mmask32) -1,
4111 _MM_FROUND_CUR_DIRECTION);
4112 }
4113
4114 extern __inline __m512i
4115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvttph_epu16(__m512i __A,__mmask32 __B,__m512h __C)4116 _mm512_mask_cvttph_epu16 (__m512i __A, __mmask32 __B, __m512h __C)
4117 {
4118 return (__m512i)
4119 __builtin_ia32_vcvttph2uw512_mask_round (__C,
4120 (__v32hi) __A,
4121 __B,
4122 _MM_FROUND_CUR_DIRECTION);
4123 }
4124
4125 extern __inline __m512i
4126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvttph_epu16(__mmask32 __A,__m512h __B)4127 _mm512_maskz_cvttph_epu16 (__mmask32 __A, __m512h __B)
4128 {
4129 return (__m512i)
4130 __builtin_ia32_vcvttph2uw512_mask_round (__B,
4131 (__v32hi)
4132 _mm512_setzero_si512 (),
4133 __A,
4134 _MM_FROUND_CUR_DIRECTION);
4135 }
4136
4137 #ifdef __OPTIMIZE__
4138 extern __inline __m512i
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtt_roundph_epu16(__m512h __A,int __B)4140 _mm512_cvtt_roundph_epu16 (__m512h __A, int __B)
4141 {
4142 return (__m512i)
4143 __builtin_ia32_vcvttph2uw512_mask_round (__A,
4144 (__v32hi)
4145 _mm512_setzero_si512 (),
4146 (__mmask32) -1,
4147 __B);
4148 }
4149
4150 extern __inline __m512i
4151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtt_roundph_epu16(__m512i __A,__mmask32 __B,__m512h __C,int __D)4152 _mm512_mask_cvtt_roundph_epu16 (__m512i __A, __mmask32 __B,
4153 __m512h __C, int __D)
4154 {
4155 return (__m512i)
4156 __builtin_ia32_vcvttph2uw512_mask_round (__C,
4157 (__v32hi) __A,
4158 __B,
4159 __D);
4160 }
4161
4162 extern __inline __m512i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtt_roundph_epu16(__mmask32 __A,__m512h __B,int __C)4164 _mm512_maskz_cvtt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
4165 {
4166 return (__m512i)
4167 __builtin_ia32_vcvttph2uw512_mask_round (__B,
4168 (__v32hi)
4169 _mm512_setzero_si512 (),
4170 __A,
4171 __C);
4172 }
4173
4174 #else
4175 #define _mm512_cvtt_roundph_epu16(A, B) \
4176 ((__m512i) \
4177 __builtin_ia32_vcvttph2uw512_mask_round ((A), \
4178 (__v32hi) \
4179 _mm512_setzero_si512 (), \
4180 (__mmask32)-1, \
4181 (B)))
4182
4183 #define _mm512_mask_cvtt_roundph_epu16(A, B, C, D) \
4184 ((__m512i) \
4185 __builtin_ia32_vcvttph2uw512_mask_round ((C), \
4186 (__v32hi)(A), \
4187 (B), \
4188 (D)))
4189
4190 #define _mm512_maskz_cvtt_roundph_epu16(A, B, C) \
4191 ((__m512i) \
4192 __builtin_ia32_vcvttph2uw512_mask_round ((B), \
4193 (__v32hi) \
4194 _mm512_setzero_si512 (), \
4195 (A), \
4196 (C)))
4197
4198 #endif /* __OPTIMIZE__ */
4199
4200 /* Intrinsics vcvtw2ph. */
4201 extern __inline __m512h
4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_ph(__m512i __A)4203 _mm512_cvtepi16_ph (__m512i __A)
4204 {
4205 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4206 _mm512_setzero_ph (),
4207 (__mmask32) -1,
4208 _MM_FROUND_CUR_DIRECTION);
4209 }
4210
4211 extern __inline __m512h
4212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_ph(__m512h __A,__mmask32 __B,__m512i __C)4213 _mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4214 {
4215 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4216 __A,
4217 __B,
4218 _MM_FROUND_CUR_DIRECTION);
4219 }
4220
4221 extern __inline __m512h
4222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_ph(__mmask32 __A,__m512i __B)4223 _mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
4224 {
4225 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4226 _mm512_setzero_ph (),
4227 __A,
4228 _MM_FROUND_CUR_DIRECTION);
4229 }
4230
4231 #ifdef __OPTIMIZE__
4232 extern __inline __m512h
4233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepi16_ph(__m512i __A,int __B)4234 _mm512_cvt_roundepi16_ph (__m512i __A, int __B)
4235 {
4236 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __A,
4237 _mm512_setzero_ph (),
4238 (__mmask32) -1,
4239 __B);
4240 }
4241
4242 extern __inline __m512h
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepi16_ph(__m512h __A,__mmask32 __B,__m512i __C,int __D)4244 _mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4245 {
4246 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __C,
4247 __A,
4248 __B,
4249 __D);
4250 }
4251
4252 extern __inline __m512h
4253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepi16_ph(__mmask32 __A,__m512i __B,int __C)4254 _mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
4255 {
4256 return __builtin_ia32_vcvtw2ph512_mask_round ((__v32hi) __B,
4257 _mm512_setzero_ph (),
4258 __A,
4259 __C);
4260 }
4261
4262 #else
4263 #define _mm512_cvt_roundepi16_ph(A, B) \
4264 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(A), \
4265 _mm512_setzero_ph (), \
4266 (__mmask32)-1, \
4267 (B)))
4268
4269 #define _mm512_mask_cvt_roundepi16_ph(A, B, C, D) \
4270 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(C), \
4271 (A), \
4272 (B), \
4273 (D)))
4274
4275 #define _mm512_maskz_cvt_roundepi16_ph(A, B, C) \
4276 (__builtin_ia32_vcvtw2ph512_mask_round ((__v32hi)(B), \
4277 _mm512_setzero_ph (), \
4278 (A), \
4279 (C)))
4280
4281 #endif /* __OPTIMIZE__ */
4282
4283 /* Intrinsics vcvtuw2ph. */
4284 extern __inline __m512h
4285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_ph(__m512i __A)4286 _mm512_cvtepu16_ph (__m512i __A)
4287 {
4288 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4289 _mm512_setzero_ph (),
4290 (__mmask32) -1,
4291 _MM_FROUND_CUR_DIRECTION);
4292 }
4293
4294 extern __inline __m512h
4295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_ph(__m512h __A,__mmask32 __B,__m512i __C)4296 _mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
4297 {
4298 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4299 __A,
4300 __B,
4301 _MM_FROUND_CUR_DIRECTION);
4302 }
4303
4304 extern __inline __m512h
4305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_ph(__mmask32 __A,__m512i __B)4306 _mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
4307 {
4308 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4309 _mm512_setzero_ph (),
4310 __A,
4311 _MM_FROUND_CUR_DIRECTION);
4312 }
4313
4314 #ifdef __OPTIMIZE__
4315 extern __inline __m512h
4316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundepu16_ph(__m512i __A,int __B)4317 _mm512_cvt_roundepu16_ph (__m512i __A, int __B)
4318 {
4319 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __A,
4320 _mm512_setzero_ph (),
4321 (__mmask32) -1,
4322 __B);
4323 }
4324
4325 extern __inline __m512h
4326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundepu16_ph(__m512h __A,__mmask32 __B,__m512i __C,int __D)4327 _mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
4328 {
4329 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __C,
4330 __A,
4331 __B,
4332 __D);
4333 }
4334
4335 extern __inline __m512h
4336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundepu16_ph(__mmask32 __A,__m512i __B,int __C)4337 _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
4338 {
4339 return __builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi) __B,
4340 _mm512_setzero_ph (),
4341 __A,
4342 __C);
4343 }
4344
4345 #else
4346 #define _mm512_cvt_roundepu16_ph(A, B) \
4347 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(A), \
4348 _mm512_setzero_ph (), \
4349 (__mmask32)-1, \
4350 (B)))
4351
4352 #define _mm512_mask_cvt_roundepu16_ph(A, B, C, D) \
4353 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(C), \
4354 (A), \
4355 (B), \
4356 (D)))
4357
4358 #define _mm512_maskz_cvt_roundepu16_ph(A, B, C) \
4359 (__builtin_ia32_vcvtuw2ph512_mask_round ((__v32hi)(B), \
4360 _mm512_setzero_ph (), \
4361 (A), \
4362 (C)))
4363
4364 #endif /* __OPTIMIZE__ */
4365
4366 /* Intrinsics vcvtsh2si, vcvtsh2us. */
4367 extern __inline int
4368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_i32(__m128h __A)4369 _mm_cvtsh_i32 (__m128h __A)
4370 {
4371 return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4372 }
4373
4374 extern __inline unsigned
4375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_u32(__m128h __A)4376 _mm_cvtsh_u32 (__m128h __A)
4377 {
4378 return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
4379 _MM_FROUND_CUR_DIRECTION);
4380 }
4381
4382 #ifdef __OPTIMIZE__
4383 extern __inline int
4384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_i32(__m128h __A,const int __R)4385 _mm_cvt_roundsh_i32 (__m128h __A, const int __R)
4386 {
4387 return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
4388 }
4389
4390 extern __inline unsigned
4391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_u32(__m128h __A,const int __R)4392 _mm_cvt_roundsh_u32 (__m128h __A, const int __R)
4393 {
4394 return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
4395 }
4396
4397 #else
4398 #define _mm_cvt_roundsh_i32(A, B) \
4399 ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
4400 #define _mm_cvt_roundsh_u32(A, B) \
4401 ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
4402
4403 #endif /* __OPTIMIZE__ */
4404
4405 #ifdef __x86_64__
4406 extern __inline long long
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_i64(__m128h __A)4408 _mm_cvtsh_i64 (__m128h __A)
4409 {
4410 return (long long)
4411 __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4412 }
4413
4414 extern __inline unsigned long long
4415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_u64(__m128h __A)4416 _mm_cvtsh_u64 (__m128h __A)
4417 {
4418 return (long long)
4419 __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4420 }
4421
4422 #ifdef __OPTIMIZE__
4423 extern __inline long long
4424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_i64(__m128h __A,const int __R)4425 _mm_cvt_roundsh_i64 (__m128h __A, const int __R)
4426 {
4427 return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
4428 }
4429
4430 extern __inline unsigned long long
4431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_u64(__m128h __A,const int __R)4432 _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
4433 {
4434 return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
4435 }
4436
4437 #else
4438 #define _mm_cvt_roundsh_i64(A, B) \
4439 ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
4440 #define _mm_cvt_roundsh_u64(A, B) \
4441 ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
4442
4443 #endif /* __OPTIMIZE__ */
4444 #endif /* __x86_64__ */
4445
4446 /* Intrinsics vcvttsh2si, vcvttsh2us. */
4447 extern __inline int
4448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_i32(__m128h __A)4449 _mm_cvttsh_i32 (__m128h __A)
4450 {
4451 return (int)
4452 __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
4453 }
4454
4455 extern __inline unsigned
4456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_u32(__m128h __A)4457 _mm_cvttsh_u32 (__m128h __A)
4458 {
4459 return (int)
4460 __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
4461 }
4462
4463 #ifdef __OPTIMIZE__
4464 extern __inline int
4465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_i32(__m128h __A,const int __R)4466 _mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
4467 {
4468 return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
4469 }
4470
4471 extern __inline unsigned
4472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_u32(__m128h __A,const int __R)4473 _mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
4474 {
4475 return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
4476 }
4477
4478 #else
4479 #define _mm_cvtt_roundsh_i32(A, B) \
4480 ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
4481 #define _mm_cvtt_roundsh_u32(A, B) \
4482 ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
4483
4484 #endif /* __OPTIMIZE__ */
4485
4486 #ifdef __x86_64__
4487 extern __inline long long
4488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_i64(__m128h __A)4489 _mm_cvttsh_i64 (__m128h __A)
4490 {
4491 return (long long)
4492 __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
4493 }
4494
4495 extern __inline unsigned long long
4496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsh_u64(__m128h __A)4497 _mm_cvttsh_u64 (__m128h __A)
4498 {
4499 return (long long)
4500 __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
4501 }
4502
4503 #ifdef __OPTIMIZE__
4504 extern __inline long long
4505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_i64(__m128h __A,const int __R)4506 _mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
4507 {
4508 return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
4509 }
4510
4511 extern __inline unsigned long long
4512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtt_roundsh_u64(__m128h __A,const int __R)4513 _mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
4514 {
4515 return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
4516 }
4517
4518 #else
4519 #define _mm_cvtt_roundsh_i64(A, B) \
4520 ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
4521 #define _mm_cvtt_roundsh_u64(A, B) \
4522 ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
4523
4524 #endif /* __OPTIMIZE__ */
4525 #endif /* __x86_64__ */
4526
4527 /* Intrinsics vcvtsi2sh, vcvtusi2sh. */
4528 extern __inline __m128h
4529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvti32_sh(__m128h __A,int __B)4530 _mm_cvti32_sh (__m128h __A, int __B)
4531 {
4532 return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4533 }
4534
4535 extern __inline __m128h
4536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtu32_sh(__m128h __A,unsigned int __B)4537 _mm_cvtu32_sh (__m128h __A, unsigned int __B)
4538 {
4539 return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4540 }
4541
4542 #ifdef __OPTIMIZE__
4543 extern __inline __m128h
4544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundi32_sh(__m128h __A,int __B,const int __R)4545 _mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
4546 {
4547 return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
4548 }
4549
4550 extern __inline __m128h
4551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundu32_sh(__m128h __A,unsigned int __B,const int __R)4552 _mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
4553 {
4554 return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
4555 }
4556
4557 #else
4558 #define _mm_cvt_roundi32_sh(A, B, C) \
4559 (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
4560 #define _mm_cvt_roundu32_sh(A, B, C) \
4561 (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
4562
4563 #endif /* __OPTIMIZE__ */
4564
4565 #ifdef __x86_64__
4566 extern __inline __m128h
4567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvti64_sh(__m128h __A,long long __B)4568 _mm_cvti64_sh (__m128h __A, long long __B)
4569 {
4570 return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4571 }
4572
4573 extern __inline __m128h
4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtu64_sh(__m128h __A,unsigned long long __B)4575 _mm_cvtu64_sh (__m128h __A, unsigned long long __B)
4576 {
4577 return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
4578 }
4579
4580 #ifdef __OPTIMIZE__
4581 extern __inline __m128h
4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundi64_sh(__m128h __A,long long __B,const int __R)4583 _mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
4584 {
4585 return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
4586 }
4587
4588 extern __inline __m128h
4589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundu64_sh(__m128h __A,unsigned long long __B,const int __R)4590 _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
4591 {
4592 return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
4593 }
4594
4595 #else
4596 #define _mm_cvt_roundi64_sh(A, B, C) \
4597 (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
4598 #define _mm_cvt_roundu64_sh(A, B, C) \
4599 (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
4600
4601 #endif /* __OPTIMIZE__ */
4602 #endif /* __x86_64__ */
4603
4604 /* Intrinsics vcvtph2pd. */
4605 extern __inline __m512d
4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtph_pd(__m128h __A)4607 _mm512_cvtph_pd (__m128h __A)
4608 {
4609 return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4610 _mm512_setzero_pd (),
4611 (__mmask8) -1,
4612 _MM_FROUND_CUR_DIRECTION);
4613 }
4614
4615 extern __inline __m512d
4616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtph_pd(__m512d __A,__mmask8 __B,__m128h __C)4617 _mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C)
4618 {
4619 return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B,
4620 _MM_FROUND_CUR_DIRECTION);
4621 }
4622
4623 extern __inline __m512d
4624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtph_pd(__mmask8 __A,__m128h __B)4625 _mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
4626 {
4627 return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4628 _mm512_setzero_pd (),
4629 __A,
4630 _MM_FROUND_CUR_DIRECTION);
4631 }
4632
4633 #ifdef __OPTIMIZE__
4634 extern __inline __m512d
4635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundph_pd(__m128h __A,int __B)4636 _mm512_cvt_roundph_pd (__m128h __A, int __B)
4637 {
4638 return __builtin_ia32_vcvtph2pd512_mask_round (__A,
4639 _mm512_setzero_pd (),
4640 (__mmask8) -1,
4641 __B);
4642 }
4643
4644 extern __inline __m512d
4645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundph_pd(__m512d __A,__mmask8 __B,__m128h __C,int __D)4646 _mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D)
4647 {
4648 return __builtin_ia32_vcvtph2pd512_mask_round (__C, __A, __B, __D);
4649 }
4650
4651 extern __inline __m512d
4652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundph_pd(__mmask8 __A,__m128h __B,int __C)4653 _mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C)
4654 {
4655 return __builtin_ia32_vcvtph2pd512_mask_round (__B,
4656 _mm512_setzero_pd (),
4657 __A,
4658 __C);
4659 }
4660
4661 #else
4662 #define _mm512_cvt_roundph_pd(A, B) \
4663 (__builtin_ia32_vcvtph2pd512_mask_round ((A), \
4664 _mm512_setzero_pd (), \
4665 (__mmask8)-1, \
4666 (B)))
4667
4668 #define _mm512_mask_cvt_roundph_pd(A, B, C, D) \
4669 (__builtin_ia32_vcvtph2pd512_mask_round ((C), (A), (B), (D)))
4670
4671 #define _mm512_maskz_cvt_roundph_pd(A, B, C) \
4672 (__builtin_ia32_vcvtph2pd512_mask_round ((B), \
4673 _mm512_setzero_pd (), \
4674 (A), \
4675 (C)))
4676
4677 #endif /* __OPTIMIZE__ */
4678
4679 /* Intrinsics vcvtph2psx. */
4680 extern __inline __m512
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtxph_ps(__m256h __A)4682 _mm512_cvtxph_ps (__m256h __A)
4683 {
4684 return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4685 _mm512_setzero_ps (),
4686 (__mmask16) -1,
4687 _MM_FROUND_CUR_DIRECTION);
4688 }
4689
4690 extern __inline __m512
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtxph_ps(__m512 __A,__mmask16 __B,__m256h __C)4692 _mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C)
4693 {
4694 return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B,
4695 _MM_FROUND_CUR_DIRECTION);
4696 }
4697
4698 extern __inline __m512
4699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtxph_ps(__mmask16 __A,__m256h __B)4700 _mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B)
4701 {
4702 return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4703 _mm512_setzero_ps (),
4704 __A,
4705 _MM_FROUND_CUR_DIRECTION);
4706 }
4707
4708 #ifdef __OPTIMIZE__
4709 extern __inline __m512
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtx_roundph_ps(__m256h __A,int __B)4711 _mm512_cvtx_roundph_ps (__m256h __A, int __B)
4712 {
4713 return __builtin_ia32_vcvtph2psx512_mask_round (__A,
4714 _mm512_setzero_ps (),
4715 (__mmask16) -1,
4716 __B);
4717 }
4718
4719 extern __inline __m512
4720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtx_roundph_ps(__m512 __A,__mmask16 __B,__m256h __C,int __D)4721 _mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D)
4722 {
4723 return __builtin_ia32_vcvtph2psx512_mask_round (__C, __A, __B, __D);
4724 }
4725
4726 extern __inline __m512
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtx_roundph_ps(__mmask16 __A,__m256h __B,int __C)4728 _mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C)
4729 {
4730 return __builtin_ia32_vcvtph2psx512_mask_round (__B,
4731 _mm512_setzero_ps (),
4732 __A,
4733 __C);
4734 }
4735
4736 #else
4737 #define _mm512_cvtx_roundph_ps(A, B) \
4738 (__builtin_ia32_vcvtph2psx512_mask_round ((A), \
4739 _mm512_setzero_ps (), \
4740 (__mmask16)-1, \
4741 (B)))
4742
4743 #define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \
4744 (__builtin_ia32_vcvtph2psx512_mask_round ((C), (A), (B), (D)))
4745
4746 #define _mm512_maskz_cvtx_roundph_ps(A, B, C) \
4747 (__builtin_ia32_vcvtph2psx512_mask_round ((B), \
4748 _mm512_setzero_ps (), \
4749 (A), \
4750 (C)))
4751 #endif /* __OPTIMIZE__ */
4752
4753 /* Intrinsics vcvtps2ph. */
4754 extern __inline __m256h
4755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtxps_ph(__m512 __A)4756 _mm512_cvtxps_ph (__m512 __A)
4757 {
4758 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4759 _mm256_setzero_ph (),
4760 (__mmask16) -1,
4761 _MM_FROUND_CUR_DIRECTION);
4762 }
4763
4764 extern __inline __m256h
4765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtxps_ph(__m256h __A,__mmask16 __B,__m512 __C)4766 _mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C)
4767 {
4768 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4769 __A, __B,
4770 _MM_FROUND_CUR_DIRECTION);
4771 }
4772
4773 extern __inline __m256h
4774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtxps_ph(__mmask16 __A,__m512 __B)4775 _mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B)
4776 {
4777 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4778 _mm256_setzero_ph (),
4779 __A,
4780 _MM_FROUND_CUR_DIRECTION);
4781 }
4782
4783 #ifdef __OPTIMIZE__
4784 extern __inline __m256h
4785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtx_roundps_ph(__m512 __A,int __B)4786 _mm512_cvtx_roundps_ph (__m512 __A, int __B)
4787 {
4788 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __A,
4789 _mm256_setzero_ph (),
4790 (__mmask16) -1,
4791 __B);
4792 }
4793
4794 extern __inline __m256h
4795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtx_roundps_ph(__m256h __A,__mmask16 __B,__m512 __C,int __D)4796 _mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D)
4797 {
4798 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __C,
4799 __A, __B, __D);
4800 }
4801
4802 extern __inline __m256h
4803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtx_roundps_ph(__mmask16 __A,__m512 __B,int __C)4804 _mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C)
4805 {
4806 return __builtin_ia32_vcvtps2phx512_mask_round ((__v16sf) __B,
4807 _mm256_setzero_ph (),
4808 __A, __C);
4809 }
4810
4811 #else
4812 #define _mm512_cvtx_roundps_ph(A, B) \
4813 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(A), \
4814 _mm256_setzero_ph (),\
4815 (__mmask16)-1, (B)))
4816
4817 #define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \
4818 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(C), \
4819 (A), (B), (D)))
4820
4821 #define _mm512_maskz_cvtx_roundps_ph(A, B, C) \
4822 (__builtin_ia32_vcvtps2phx512_mask_round ((__v16sf)(B), \
4823 _mm256_setzero_ph (),\
4824 (A), (C)))
4825 #endif /* __OPTIMIZE__ */
4826
4827 /* Intrinsics vcvtpd2ph. */
4828 extern __inline __m128h
4829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtpd_ph(__m512d __A)4830 _mm512_cvtpd_ph (__m512d __A)
4831 {
4832 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4833 _mm_setzero_ph (),
4834 (__mmask8) -1,
4835 _MM_FROUND_CUR_DIRECTION);
4836 }
4837
4838 extern __inline __m128h
4839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtpd_ph(__m128h __A,__mmask8 __B,__m512d __C)4840 _mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C)
4841 {
4842 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4843 __A, __B,
4844 _MM_FROUND_CUR_DIRECTION);
4845 }
4846
4847 extern __inline __m128h
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtpd_ph(__mmask8 __A,__m512d __B)4849 _mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B)
4850 {
4851 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4852 _mm_setzero_ph (),
4853 __A,
4854 _MM_FROUND_CUR_DIRECTION);
4855 }
4856
4857 #ifdef __OPTIMIZE__
4858 extern __inline __m128h
4859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvt_roundpd_ph(__m512d __A,int __B)4860 _mm512_cvt_roundpd_ph (__m512d __A, int __B)
4861 {
4862 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __A,
4863 _mm_setzero_ph (),
4864 (__mmask8) -1,
4865 __B);
4866 }
4867
4868 extern __inline __m128h
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvt_roundpd_ph(__m128h __A,__mmask8 __B,__m512d __C,int __D)4870 _mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D)
4871 {
4872 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __C,
4873 __A, __B, __D);
4874 }
4875
4876 extern __inline __m128h
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvt_roundpd_ph(__mmask8 __A,__m512d __B,int __C)4878 _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
4879 {
4880 return __builtin_ia32_vcvtpd2ph512_mask_round ((__v8df) __B,
4881 _mm_setzero_ph (),
4882 __A, __C);
4883 }
4884
4885 #else
4886 #define _mm512_cvt_roundpd_ph(A, B) \
4887 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(A), \
4888 _mm_setzero_ph (), \
4889 (__mmask8)-1, (B)))
4890
4891 #define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \
4892 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(C), \
4893 (A), (B), (D)))
4894
4895 #define _mm512_maskz_cvt_roundpd_ph(A, B, C) \
4896 (__builtin_ia32_vcvtpd2ph512_mask_round ((__v8df)(B), \
4897 _mm_setzero_ph (), \
4898 (A), (C)))
4899
4900 #endif /* __OPTIMIZE__ */
4901
4902 /* Intrinsics vcvtsh2ss, vcvtsh2sd. */
4903 extern __inline __m128
4904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_ss(__m128 __A,__m128h __B)4905 _mm_cvtsh_ss (__m128 __A, __m128h __B)
4906 {
4907 return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4908 _mm_setzero_ps (),
4909 (__mmask8) -1,
4910 _MM_FROUND_CUR_DIRECTION);
4911 }
4912
4913 extern __inline __m128
4914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsh_ss(__m128 __A,__mmask8 __B,__m128 __C,__m128h __D)4915 _mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4916 __m128h __D)
4917 {
4918 return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
4919 _MM_FROUND_CUR_DIRECTION);
4920 }
4921
4922 extern __inline __m128
4923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsh_ss(__mmask8 __A,__m128 __B,__m128h __C)4924 _mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
4925 __m128h __C)
4926 {
4927 return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4928 _mm_setzero_ps (),
4929 __A, _MM_FROUND_CUR_DIRECTION);
4930 }
4931
4932 extern __inline __m128d
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsh_sd(__m128d __A,__m128h __B)4934 _mm_cvtsh_sd (__m128d __A, __m128h __B)
4935 {
4936 return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4937 _mm_setzero_pd (),
4938 (__mmask8) -1,
4939 _MM_FROUND_CUR_DIRECTION);
4940 }
4941
4942 extern __inline __m128d
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsh_sd(__m128d __A,__mmask8 __B,__m128d __C,__m128h __D)4944 _mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
4945 __m128h __D)
4946 {
4947 return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
4948 _MM_FROUND_CUR_DIRECTION);
4949 }
4950
4951 extern __inline __m128d
4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsh_sd(__mmask8 __A,__m128d __B,__m128h __C)4953 _mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
4954 {
4955 return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
4956 _mm_setzero_pd (),
4957 __A, _MM_FROUND_CUR_DIRECTION);
4958 }
4959
4960 #ifdef __OPTIMIZE__
4961 extern __inline __m128
4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_ss(__m128 __A,__m128h __B,const int __R)4963 _mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
4964 {
4965 return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
4966 _mm_setzero_ps (),
4967 (__mmask8) -1, __R);
4968 }
4969
4970 extern __inline __m128
4971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsh_ss(__m128 __A,__mmask8 __B,__m128 __C,__m128h __D,const int __R)4972 _mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
4973 __m128h __D, const int __R)
4974 {
4975 return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
4976 }
4977
4978 extern __inline __m128
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsh_ss(__mmask8 __A,__m128 __B,__m128h __C,const int __R)4980 _mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
4981 __m128h __C, const int __R)
4982 {
4983 return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
4984 _mm_setzero_ps (),
4985 __A, __R);
4986 }
4987
4988 extern __inline __m128d
4989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsh_sd(__m128d __A,__m128h __B,const int __R)4990 _mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
4991 {
4992 return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
4993 _mm_setzero_pd (),
4994 (__mmask8) -1, __R);
4995 }
4996
4997 extern __inline __m128d
4998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsh_sd(__m128d __A,__mmask8 __B,__m128d __C,__m128h __D,const int __R)4999 _mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
5000 __m128h __D, const int __R)
5001 {
5002 return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
5003 }
5004
5005 extern __inline __m128d
5006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsh_sd(__mmask8 __A,__m128d __B,__m128h __C,const int __R)5007 _mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
5008 {
5009 return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
5010 _mm_setzero_pd (),
5011 __A, __R);
5012 }
5013
5014 #else
5015 #define _mm_cvt_roundsh_ss(A, B, R) \
5016 (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
5017 _mm_setzero_ps (), \
5018 (__mmask8) -1, (R)))
5019
5020 #define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
5021 (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
5022
5023 #define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
5024 (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
5025 _mm_setzero_ps (), \
5026 (A), (R)))
5027
5028 #define _mm_cvt_roundsh_sd(A, B, R) \
5029 (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
5030 _mm_setzero_pd (), \
5031 (__mmask8) -1, (R)))
5032
5033 #define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
5034 (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
5035
5036 #define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
5037 (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
5038 _mm_setzero_pd (), \
5039 (A), (R)))
5040
5041 #endif /* __OPTIMIZE__ */
5042
5043 /* Intrinsics vcvtss2sh, vcvtsd2sh. */
5044 extern __inline __m128h
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sh(__m128h __A,__m128 __B)5046 _mm_cvtss_sh (__m128h __A, __m128 __B)
5047 {
5048 return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5049 _mm_setzero_ph (),
5050 (__mmask8) -1,
5051 _MM_FROUND_CUR_DIRECTION);
5052 }
5053
5054 extern __inline __m128h
5055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtss_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128 __D)5056 _mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
5057 {
5058 return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
5059 _MM_FROUND_CUR_DIRECTION);
5060 }
5061
5062 extern __inline __m128h
5063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtss_sh(__mmask8 __A,__m128h __B,__m128 __C)5064 _mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
5065 {
5066 return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5067 _mm_setzero_ph (),
5068 __A, _MM_FROUND_CUR_DIRECTION);
5069 }
5070
5071 extern __inline __m128h
5072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_sh(__m128h __A,__m128d __B)5073 _mm_cvtsd_sh (__m128h __A, __m128d __B)
5074 {
5075 return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5076 _mm_setzero_ph (),
5077 (__mmask8) -1,
5078 _MM_FROUND_CUR_DIRECTION);
5079 }
5080
5081 extern __inline __m128h
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsd_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128d __D)5083 _mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
5084 {
5085 return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
5086 _MM_FROUND_CUR_DIRECTION);
5087 }
5088
5089 extern __inline __m128h
5090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtsd_sh(__mmask8 __A,__m128h __B,__m128d __C)5091 _mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
5092 {
5093 return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5094 _mm_setzero_ph (),
5095 __A, _MM_FROUND_CUR_DIRECTION);
5096 }
5097
5098 #ifdef __OPTIMIZE__
5099 extern __inline __m128h
5100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundss_sh(__m128h __A,__m128 __B,const int __R)5101 _mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
5102 {
5103 return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
5104 _mm_setzero_ph (),
5105 (__mmask8) -1, __R);
5106 }
5107
5108 extern __inline __m128h
5109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundss_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128 __D,const int __R)5110 _mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
5111 const int __R)
5112 {
5113 return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
5114 }
5115
5116 extern __inline __m128h
5117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundss_sh(__mmask8 __A,__m128h __B,__m128 __C,const int __R)5118 _mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
5119 const int __R)
5120 {
5121 return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
5122 _mm_setzero_ph (),
5123 __A, __R);
5124 }
5125
5126 extern __inline __m128h
5127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvt_roundsd_sh(__m128h __A,__m128d __B,const int __R)5128 _mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
5129 {
5130 return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
5131 _mm_setzero_ph (),
5132 (__mmask8) -1, __R);
5133 }
5134
5135 extern __inline __m128h
5136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvt_roundsd_sh(__m128h __A,__mmask8 __B,__m128h __C,__m128d __D,const int __R)5137 _mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
5138 const int __R)
5139 {
5140 return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
5141 }
5142
5143 extern __inline __m128h
5144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvt_roundsd_sh(__mmask8 __A,__m128h __B,__m128d __C,const int __R)5145 _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
5146 const int __R)
5147 {
5148 return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
5149 _mm_setzero_ph (),
5150 __A, __R);
5151 }
5152
5153 #else
5154 #define _mm_cvt_roundss_sh(A, B, R) \
5155 (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
5156 _mm_setzero_ph (), \
5157 (__mmask8) -1, R))
5158
5159 #define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
5160 (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
5161
5162 #define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
5163 (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
5164 _mm_setzero_ph (), \
5165 A, R))
5166
5167 #define _mm_cvt_roundsd_sh(A, B, R) \
5168 (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
5169 _mm_setzero_ph (), \
5170 (__mmask8) -1, R))
5171
5172 #define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
5173 (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
5174
5175 #define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
5176 (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
5177 _mm_setzero_ph (), \
5178 (A), (R)))
5179
5180 #endif /* __OPTIMIZE__ */
5181
5182 /* Intrinsics vfmaddsub[132,213,231]ph. */
5183 extern __inline __m512h
5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmaddsub_ph(__m512h __A,__m512h __B,__m512h __C)5185 _mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C)
5186 {
5187 return (__m512h)
5188 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5189 (__v32hf) __B,
5190 (__v32hf) __C,
5191 (__mmask32) -1,
5192 _MM_FROUND_CUR_DIRECTION);
5193 }
5194
5195 extern __inline __m512h
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmaddsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5197 _mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5198 {
5199 return (__m512h)
5200 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5201 (__v32hf) __B,
5202 (__v32hf) __C,
5203 (__mmask32) __U,
5204 _MM_FROUND_CUR_DIRECTION);
5205 }
5206
5207 extern __inline __m512h
5208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmaddsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5209 _mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5210 {
5211 return (__m512h)
5212 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5213 (__v32hf) __B,
5214 (__v32hf) __C,
5215 (__mmask32) __U,
5216 _MM_FROUND_CUR_DIRECTION);
5217 }
5218
5219 extern __inline __m512h
5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmaddsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5221 _mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5222 {
5223 return (__m512h)
5224 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5225 (__v32hf) __B,
5226 (__v32hf) __C,
5227 (__mmask32) __U,
5228 _MM_FROUND_CUR_DIRECTION);
5229 }
5230
5231 #ifdef __OPTIMIZE__
5232 extern __inline __m512h
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmaddsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5234 _mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5235 {
5236 return (__m512h)
5237 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5238 (__v32hf) __B,
5239 (__v32hf) __C,
5240 (__mmask32) -1, __R);
5241 }
5242
5243 extern __inline __m512h
5244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmaddsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5245 _mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5246 __m512h __C, const int __R)
5247 {
5248 return (__m512h)
5249 __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A,
5250 (__v32hf) __B,
5251 (__v32hf) __C,
5252 (__mmask32) __U, __R);
5253 }
5254
5255 extern __inline __m512h
5256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmaddsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5257 _mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5258 __mmask32 __U, const int __R)
5259 {
5260 return (__m512h)
5261 __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A,
5262 (__v32hf) __B,
5263 (__v32hf) __C,
5264 (__mmask32) __U, __R);
5265 }
5266
5267 extern __inline __m512h
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmaddsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5269 _mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5270 __m512h __C, const int __R)
5271 {
5272 return (__m512h)
5273 __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A,
5274 (__v32hf) __B,
5275 (__v32hf) __C,
5276 (__mmask32) __U, __R);
5277 }
5278
5279 #else
5280 #define _mm512_fmaddsub_round_ph(A, B, C, R) \
5281 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R)))
5282
5283 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
5284 ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R)))
5285
5286 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
5287 ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R)))
5288
5289 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
5290 ((__m512h)__builtin_ia32_vfmaddsubph512_maskz ((A), (B), (C), (U), (R)))
5291
5292 #endif /* __OPTIMIZE__ */
5293
5294 /* Intrinsics vfmsubadd[132,213,231]ph. */
5295 extern __inline __m512h
5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsubadd_ph(__m512h __A,__m512h __B,__m512h __C)5297 _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C)
5298 {
5299 return (__m512h)
5300 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5301 (__v32hf) __B,
5302 (__v32hf) __C,
5303 (__mmask32) -1,
5304 _MM_FROUND_CUR_DIRECTION);
5305 }
5306
5307 extern __inline __m512h
5308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsubadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5309 _mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U,
5310 __m512h __B, __m512h __C)
5311 {
5312 return (__m512h)
5313 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5314 (__v32hf) __B,
5315 (__v32hf) __C,
5316 (__mmask32) __U,
5317 _MM_FROUND_CUR_DIRECTION);
5318 }
5319
5320 extern __inline __m512h
5321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsubadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5322 _mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B,
5323 __m512h __C, __mmask32 __U)
5324 {
5325 return (__m512h)
5326 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5327 (__v32hf) __B,
5328 (__v32hf) __C,
5329 (__mmask32) __U,
5330 _MM_FROUND_CUR_DIRECTION);
5331 }
5332
5333 extern __inline __m512h
5334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsubadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5335 _mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A,
5336 __m512h __B, __m512h __C)
5337 {
5338 return (__m512h)
5339 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5340 (__v32hf) __B,
5341 (__v32hf) __C,
5342 (__mmask32) __U,
5343 _MM_FROUND_CUR_DIRECTION);
5344 }
5345
5346 #ifdef __OPTIMIZE__
5347 extern __inline __m512h
5348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsubadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5349 _mm512_fmsubadd_round_ph (__m512h __A, __m512h __B,
5350 __m512h __C, const int __R)
5351 {
5352 return (__m512h)
5353 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5354 (__v32hf) __B,
5355 (__v32hf) __C,
5356 (__mmask32) -1, __R);
5357 }
5358
5359 extern __inline __m512h
5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsubadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5361 _mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5362 __m512h __C, const int __R)
5363 {
5364 return (__m512h)
5365 __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A,
5366 (__v32hf) __B,
5367 (__v32hf) __C,
5368 (__mmask32) __U, __R);
5369 }
5370
5371 extern __inline __m512h
5372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsubadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5373 _mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5374 __mmask32 __U, const int __R)
5375 {
5376 return (__m512h)
5377 __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A,
5378 (__v32hf) __B,
5379 (__v32hf) __C,
5380 (__mmask32) __U, __R);
5381 }
5382
5383 extern __inline __m512h
5384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsubadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5385 _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5386 __m512h __C, const int __R)
5387 {
5388 return (__m512h)
5389 __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A,
5390 (__v32hf) __B,
5391 (__v32hf) __C,
5392 (__mmask32) __U, __R);
5393 }
5394
5395 #else
5396 #define _mm512_fmsubadd_round_ph(A, B, C, R) \
5397 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R)))
5398
5399 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
5400 ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R)))
5401
5402 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
5403 ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R)))
5404
5405 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
5406 ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R)))
5407
5408 #endif /* __OPTIMIZE__ */
5409
5410 /* Intrinsics vfmadd[132,213,231]ph. */
5411 extern __inline __m512h
5412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_ph(__m512h __A,__m512h __B,__m512h __C)5413 _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5414 {
5415 return (__m512h)
5416 __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5417 (__v32hf) __B,
5418 (__v32hf) __C,
5419 (__mmask32) -1,
5420 _MM_FROUND_CUR_DIRECTION);
5421 }
5422
5423 extern __inline __m512h
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5425 _mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5426 {
5427 return (__m512h)
5428 __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5429 (__v32hf) __B,
5430 (__v32hf) __C,
5431 (__mmask32) __U,
5432 _MM_FROUND_CUR_DIRECTION);
5433 }
5434
5435 extern __inline __m512h
5436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5437 _mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5438 {
5439 return (__m512h)
5440 __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5441 (__v32hf) __B,
5442 (__v32hf) __C,
5443 (__mmask32) __U,
5444 _MM_FROUND_CUR_DIRECTION);
5445 }
5446
5447 extern __inline __m512h
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5449 _mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5450 {
5451 return (__m512h)
5452 __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5453 (__v32hf) __B,
5454 (__v32hf) __C,
5455 (__mmask32) __U,
5456 _MM_FROUND_CUR_DIRECTION);
5457 }
5458
5459 #ifdef __OPTIMIZE__
5460 extern __inline __m512h
5461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5462 _mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5463 {
5464 return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5465 (__v32hf) __B,
5466 (__v32hf) __C,
5467 (__mmask32) -1, __R);
5468 }
5469
5470 extern __inline __m512h
5471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5472 _mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5473 __m512h __C, const int __R)
5474 {
5475 return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A,
5476 (__v32hf) __B,
5477 (__v32hf) __C,
5478 (__mmask32) __U, __R);
5479 }
5480
5481 extern __inline __m512h
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5483 _mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5484 __mmask32 __U, const int __R)
5485 {
5486 return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A,
5487 (__v32hf) __B,
5488 (__v32hf) __C,
5489 (__mmask32) __U, __R);
5490 }
5491
5492 extern __inline __m512h
5493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5494 _mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5495 __m512h __C, const int __R)
5496 {
5497 return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A,
5498 (__v32hf) __B,
5499 (__v32hf) __C,
5500 (__mmask32) __U, __R);
5501 }
5502
5503 #else
5504 #define _mm512_fmadd_round_ph(A, B, C, R) \
5505 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R)))
5506
5507 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
5508 ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R)))
5509
5510 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
5511 ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R)))
5512
5513 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
5514 ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R)))
5515
5516 #endif /* __OPTIMIZE__ */
5517
5518 /* Intrinsics vfnmadd[132,213,231]ph. */
5519 extern __inline __m512h
5520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmadd_ph(__m512h __A,__m512h __B,__m512h __C)5521 _mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C)
5522 {
5523 return (__m512h)
5524 __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5525 (__v32hf) __B,
5526 (__v32hf) __C,
5527 (__mmask32) -1,
5528 _MM_FROUND_CUR_DIRECTION);
5529 }
5530
5531 extern __inline __m512h
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmadd_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5533 _mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5534 {
5535 return (__m512h)
5536 __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5537 (__v32hf) __B,
5538 (__v32hf) __C,
5539 (__mmask32) __U,
5540 _MM_FROUND_CUR_DIRECTION);
5541 }
5542
5543 extern __inline __m512h
5544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmadd_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5545 _mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5546 {
5547 return (__m512h)
5548 __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5549 (__v32hf) __B,
5550 (__v32hf) __C,
5551 (__mmask32) __U,
5552 _MM_FROUND_CUR_DIRECTION);
5553 }
5554
5555 extern __inline __m512h
5556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmadd_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5557 _mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5558 {
5559 return (__m512h)
5560 __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5561 (__v32hf) __B,
5562 (__v32hf) __C,
5563 (__mmask32) __U,
5564 _MM_FROUND_CUR_DIRECTION);
5565 }
5566
5567 #ifdef __OPTIMIZE__
5568 extern __inline __m512h
5569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5570 _mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5571 {
5572 return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5573 (__v32hf) __B,
5574 (__v32hf) __C,
5575 (__mmask32) -1, __R);
5576 }
5577
5578 extern __inline __m512h
5579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmadd_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5580 _mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5581 __m512h __C, const int __R)
5582 {
5583 return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A,
5584 (__v32hf) __B,
5585 (__v32hf) __C,
5586 (__mmask32) __U, __R);
5587 }
5588
5589 extern __inline __m512h
5590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmadd_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5591 _mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C,
5592 __mmask32 __U, const int __R)
5593 {
5594 return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A,
5595 (__v32hf) __B,
5596 (__v32hf) __C,
5597 (__mmask32) __U, __R);
5598 }
5599
5600 extern __inline __m512h
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmadd_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5602 _mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5603 __m512h __C, const int __R)
5604 {
5605 return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A,
5606 (__v32hf) __B,
5607 (__v32hf) __C,
5608 (__mmask32) __U, __R);
5609 }
5610
5611 #else
5612 #define _mm512_fnmadd_round_ph(A, B, C, R) \
5613 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R)))
5614
5615 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
5616 ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R)))
5617
5618 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
5619 ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R)))
5620
5621 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
5622 ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R)))
5623
5624 #endif /* __OPTIMIZE__ */
5625
5626 /* Intrinsics vfmsub[132,213,231]ph. */
5627 extern __inline __m512h
5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_ph(__m512h __A,__m512h __B,__m512h __C)5629 _mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5630 {
5631 return (__m512h)
5632 __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5633 (__v32hf) __B,
5634 (__v32hf) __C,
5635 (__mmask32) -1,
5636 _MM_FROUND_CUR_DIRECTION);
5637 }
5638
5639 extern __inline __m512h
5640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5641 _mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5642 {
5643 return (__m512h)
5644 __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5645 (__v32hf) __B,
5646 (__v32hf) __C,
5647 (__mmask32) __U,
5648 _MM_FROUND_CUR_DIRECTION);
5649 }
5650
5651 extern __inline __m512h
5652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5653 _mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5654 {
5655 return (__m512h)
5656 __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5657 (__v32hf) __B,
5658 (__v32hf) __C,
5659 (__mmask32) __U,
5660 _MM_FROUND_CUR_DIRECTION);
5661 }
5662
5663 extern __inline __m512h
5664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5665 _mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5666 {
5667 return (__m512h)
5668 __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5669 (__v32hf) __B,
5670 (__v32hf) __C,
5671 (__mmask32) __U,
5672 _MM_FROUND_CUR_DIRECTION);
5673 }
5674
5675 #ifdef __OPTIMIZE__
5676 extern __inline __m512h
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5678 _mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5679 {
5680 return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5681 (__v32hf) __B,
5682 (__v32hf) __C,
5683 (__mmask32) -1, __R);
5684 }
5685
5686 extern __inline __m512h
5687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5688 _mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5689 __m512h __C, const int __R)
5690 {
5691 return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A,
5692 (__v32hf) __B,
5693 (__v32hf) __C,
5694 (__mmask32) __U, __R);
5695 }
5696
5697 extern __inline __m512h
5698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5699 _mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5700 __mmask32 __U, const int __R)
5701 {
5702 return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A,
5703 (__v32hf) __B,
5704 (__v32hf) __C,
5705 (__mmask32) __U, __R);
5706 }
5707
5708 extern __inline __m512h
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5710 _mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5711 __m512h __C, const int __R)
5712 {
5713 return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A,
5714 (__v32hf) __B,
5715 (__v32hf) __C,
5716 (__mmask32) __U, __R);
5717 }
5718
5719 #else
5720 #define _mm512_fmsub_round_ph(A, B, C, R) \
5721 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R)))
5722
5723 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
5724 ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R)))
5725
5726 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
5727 ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R)))
5728
5729 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
5730 ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R)))
5731
5732 #endif /* __OPTIMIZE__ */
5733
5734 /* Intrinsics vfnmsub[132,213,231]ph. */
5735 extern __inline __m512h
5736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_ph(__m512h __A,__m512h __B,__m512h __C)5737 _mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C)
5738 {
5739 return (__m512h)
5740 __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5741 (__v32hf) __B,
5742 (__v32hf) __C,
5743 (__mmask32) -1,
5744 _MM_FROUND_CUR_DIRECTION);
5745 }
5746
5747 extern __inline __m512h
5748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmsub_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C)5749 _mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C)
5750 {
5751 return (__m512h)
5752 __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5753 (__v32hf) __B,
5754 (__v32hf) __C,
5755 (__mmask32) __U,
5756 _MM_FROUND_CUR_DIRECTION);
5757 }
5758
5759 extern __inline __m512h
5760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmsub_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U)5761 _mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U)
5762 {
5763 return (__m512h)
5764 __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5765 (__v32hf) __B,
5766 (__v32hf) __C,
5767 (__mmask32) __U,
5768 _MM_FROUND_CUR_DIRECTION);
5769 }
5770
5771 extern __inline __m512h
5772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C)5773 _mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C)
5774 {
5775 return (__m512h)
5776 __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5777 (__v32hf) __B,
5778 (__v32hf) __C,
5779 (__mmask32) __U,
5780 _MM_FROUND_CUR_DIRECTION);
5781 }
5782
5783 #ifdef __OPTIMIZE__
5784 extern __inline __m512h
5785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,const int __R)5786 _mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R)
5787 {
5788 return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5789 (__v32hf) __B,
5790 (__v32hf) __C,
5791 (__mmask32) -1, __R);
5792 }
5793
5794 extern __inline __m512h
5795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fnmsub_round_ph(__m512h __A,__mmask32 __U,__m512h __B,__m512h __C,const int __R)5796 _mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B,
5797 __m512h __C, const int __R)
5798 {
5799 return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A,
5800 (__v32hf) __B,
5801 (__v32hf) __C,
5802 (__mmask32) __U, __R);
5803 }
5804
5805 extern __inline __m512h
5806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fnmsub_round_ph(__m512h __A,__m512h __B,__m512h __C,__mmask32 __U,const int __R)5807 _mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C,
5808 __mmask32 __U, const int __R)
5809 {
5810 return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A,
5811 (__v32hf) __B,
5812 (__v32hf) __C,
5813 (__mmask32) __U, __R);
5814 }
5815
5816 extern __inline __m512h
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_round_ph(__mmask32 __U,__m512h __A,__m512h __B,__m512h __C,const int __R)5818 _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
5819 __m512h __C, const int __R)
5820 {
5821 return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A,
5822 (__v32hf) __B,
5823 (__v32hf) __C,
5824 (__mmask32) __U, __R);
5825 }
5826
5827 #else
5828 #define _mm512_fnmsub_round_ph(A, B, C, R) \
5829 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R)))
5830
5831 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
5832 ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R)))
5833
5834 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
5835 ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R)))
5836
5837 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
5838 ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R)))
5839
5840 #endif /* __OPTIMIZE__ */
5841
5842 /* Intrinsics vfmadd[132,213,231]sh. */
5843 extern __inline __m128h
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sh(__m128h __W,__m128h __A,__m128h __B)5845 _mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5846 {
5847 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5848 (__v8hf) __A,
5849 (__v8hf) __B,
5850 (__mmask8) -1,
5851 _MM_FROUND_CUR_DIRECTION);
5852 }
5853
5854 extern __inline __m128h
5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)5856 _mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5857 {
5858 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5859 (__v8hf) __A,
5860 (__v8hf) __B,
5861 (__mmask8) __U,
5862 _MM_FROUND_CUR_DIRECTION);
5863 }
5864
5865 extern __inline __m128h
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)5867 _mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5868 {
5869 return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5870 (__v8hf) __A,
5871 (__v8hf) __B,
5872 (__mmask8) __U,
5873 _MM_FROUND_CUR_DIRECTION);
5874 }
5875
5876 extern __inline __m128h
5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)5878 _mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5879 {
5880 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5881 (__v8hf) __A,
5882 (__v8hf) __B,
5883 (__mmask8) __U,
5884 _MM_FROUND_CUR_DIRECTION);
5885 }
5886
5887
5888 #ifdef __OPTIMIZE__
5889 extern __inline __m128h
5890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)5891 _mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5892 {
5893 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5894 (__v8hf) __A,
5895 (__v8hf) __B,
5896 (__mmask8) -1,
5897 __R);
5898 }
5899
5900 extern __inline __m128h
5901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)5902 _mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
5903 const int __R)
5904 {
5905 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
5906 (__v8hf) __A,
5907 (__v8hf) __B,
5908 (__mmask8) __U, __R);
5909 }
5910
5911 extern __inline __m128h
5912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)5913 _mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
5914 const int __R)
5915 {
5916 return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
5917 (__v8hf) __A,
5918 (__v8hf) __B,
5919 (__mmask8) __U, __R);
5920 }
5921
5922 extern __inline __m128h
5923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)5924 _mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
5925 __m128h __B, const int __R)
5926 {
5927 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
5928 (__v8hf) __A,
5929 (__v8hf) __B,
5930 (__mmask8) __U, __R);
5931 }
5932
5933 #else
5934 #define _mm_fmadd_round_sh(A, B, C, R) \
5935 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
5936 #define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
5937 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
5938 #define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
5939 ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
5940 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
5941 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
5942
5943 #endif /* __OPTIMIZE__ */
5944
5945 /* Intrinsics vfnmadd[132,213,231]sh. */
5946 extern __inline __m128h
5947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_sh(__m128h __W,__m128h __A,__m128h __B)5948 _mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
5949 {
5950 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5951 (__v8hf) __A,
5952 (__v8hf) __B,
5953 (__mmask8) -1,
5954 _MM_FROUND_CUR_DIRECTION);
5955 }
5956
5957 extern __inline __m128h
5958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)5959 _mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
5960 {
5961 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5962 (__v8hf) __A,
5963 (__v8hf) __B,
5964 (__mmask8) __U,
5965 _MM_FROUND_CUR_DIRECTION);
5966 }
5967
5968 extern __inline __m128h
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)5970 _mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
5971 {
5972 return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
5973 (__v8hf) __A,
5974 (__v8hf) __B,
5975 (__mmask8) __U,
5976 _MM_FROUND_CUR_DIRECTION);
5977 }
5978
5979 extern __inline __m128h
5980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)5981 _mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
5982 {
5983 return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
5984 (__v8hf) __A,
5985 (__v8hf) __B,
5986 (__mmask8) __U,
5987 _MM_FROUND_CUR_DIRECTION);
5988 }
5989
5990
5991 #ifdef __OPTIMIZE__
5992 extern __inline __m128h
5993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)5994 _mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
5995 {
5996 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
5997 (__v8hf) __A,
5998 (__v8hf) __B,
5999 (__mmask8) -1,
6000 __R);
6001 }
6002
6003 extern __inline __m128h
6004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmadd_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6005 _mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6006 const int __R)
6007 {
6008 return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
6009 (__v8hf) __A,
6010 (__v8hf) __B,
6011 (__mmask8) __U, __R);
6012 }
6013
6014 extern __inline __m128h
6015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmadd_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6016 _mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6017 const int __R)
6018 {
6019 return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
6020 (__v8hf) __A,
6021 (__v8hf) __B,
6022 (__mmask8) __U, __R);
6023 }
6024
6025 extern __inline __m128h
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmadd_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6027 _mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6028 __m128h __B, const int __R)
6029 {
6030 return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
6031 (__v8hf) __A,
6032 (__v8hf) __B,
6033 (__mmask8) __U, __R);
6034 }
6035
6036 #else
6037 #define _mm_fnmadd_round_sh(A, B, C, R) \
6038 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
6039 #define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
6040 ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
6041 #define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
6042 ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
6043 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
6044 ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
6045
6046 #endif /* __OPTIMIZE__ */
6047
6048 /* Intrinsics vfmsub[132,213,231]sh. */
6049 extern __inline __m128h
6050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_sh(__m128h __W,__m128h __A,__m128h __B)6051 _mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6052 {
6053 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6054 (__v8hf) __A,
6055 -(__v8hf) __B,
6056 (__mmask8) -1,
6057 _MM_FROUND_CUR_DIRECTION);
6058 }
6059
6060 extern __inline __m128h
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)6062 _mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6063 {
6064 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6065 (__v8hf) __A,
6066 -(__v8hf) __B,
6067 (__mmask8) __U,
6068 _MM_FROUND_CUR_DIRECTION);
6069 }
6070
6071 extern __inline __m128h
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)6073 _mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6074 {
6075 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6076 (__v8hf) __A,
6077 (__v8hf) __B,
6078 (__mmask8) __U,
6079 _MM_FROUND_CUR_DIRECTION);
6080 }
6081
6082 extern __inline __m128h
6083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)6084 _mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6085 {
6086 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6087 (__v8hf) __A,
6088 -(__v8hf) __B,
6089 (__mmask8) __U,
6090 _MM_FROUND_CUR_DIRECTION);
6091 }
6092
6093
6094 #ifdef __OPTIMIZE__
6095 extern __inline __m128h
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)6097 _mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6098 {
6099 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6100 (__v8hf) __A,
6101 -(__v8hf) __B,
6102 (__mmask8) -1,
6103 __R);
6104 }
6105
6106 extern __inline __m128h
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6108 _mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6109 const int __R)
6110 {
6111 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6112 (__v8hf) __A,
6113 -(__v8hf) __B,
6114 (__mmask8) __U, __R);
6115 }
6116
6117 extern __inline __m128h
6118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6119 _mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6120 const int __R)
6121 {
6122 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6123 (__v8hf) __A,
6124 (__v8hf) __B,
6125 (__mmask8) __U, __R);
6126 }
6127
6128 extern __inline __m128h
6129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6130 _mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6131 __m128h __B, const int __R)
6132 {
6133 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6134 (__v8hf) __A,
6135 -(__v8hf) __B,
6136 (__mmask8) __U, __R);
6137 }
6138
6139 #else
6140 #define _mm_fmsub_round_sh(A, B, C, R) \
6141 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
6142 #define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
6143 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
6144 #define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
6145 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
6146 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
6147 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
6148
6149 #endif /* __OPTIMIZE__ */
6150
6151 /* Intrinsics vfnmsub[132,213,231]sh. */
6152 extern __inline __m128h
6153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_sh(__m128h __W,__m128h __A,__m128h __B)6154 _mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
6155 {
6156 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6157 -(__v8hf) __A,
6158 -(__v8hf) __B,
6159 (__mmask8) -1,
6160 _MM_FROUND_CUR_DIRECTION);
6161 }
6162
6163 extern __inline __m128h
6164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B)6165 _mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
6166 {
6167 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6168 -(__v8hf) __A,
6169 -(__v8hf) __B,
6170 (__mmask8) __U,
6171 _MM_FROUND_CUR_DIRECTION);
6172 }
6173
6174 extern __inline __m128h
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U)6176 _mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
6177 {
6178 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6179 -(__v8hf) __A,
6180 (__v8hf) __B,
6181 (__mmask8) __U,
6182 _MM_FROUND_CUR_DIRECTION);
6183 }
6184
6185 extern __inline __m128h
6186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B)6187 _mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
6188 {
6189 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6190 -(__v8hf) __A,
6191 -(__v8hf) __B,
6192 (__mmask8) __U,
6193 _MM_FROUND_CUR_DIRECTION);
6194 }
6195
6196
6197 #ifdef __OPTIMIZE__
6198 extern __inline __m128h
6199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,const int __R)6200 _mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
6201 {
6202 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6203 -(__v8hf) __A,
6204 -(__v8hf) __B,
6205 (__mmask8) -1,
6206 __R);
6207 }
6208
6209 extern __inline __m128h
6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fnmsub_round_sh(__m128h __W,__mmask8 __U,__m128h __A,__m128h __B,const int __R)6211 _mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
6212 const int __R)
6213 {
6214 return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
6215 -(__v8hf) __A,
6216 -(__v8hf) __B,
6217 (__mmask8) __U, __R);
6218 }
6219
6220 extern __inline __m128h
6221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fnmsub_round_sh(__m128h __W,__m128h __A,__m128h __B,__mmask8 __U,const int __R)6222 _mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
6223 const int __R)
6224 {
6225 return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
6226 -(__v8hf) __A,
6227 (__v8hf) __B,
6228 (__mmask8) __U, __R);
6229 }
6230
6231 extern __inline __m128h
6232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_round_sh(__mmask8 __U,__m128h __W,__m128h __A,__m128h __B,const int __R)6233 _mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
6234 __m128h __B, const int __R)
6235 {
6236 return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
6237 -(__v8hf) __A,
6238 -(__v8hf) __B,
6239 (__mmask8) __U, __R);
6240 }
6241
6242 #else
6243 #define _mm_fnmsub_round_sh(A, B, C, R) \
6244 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
6245 #define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
6246 ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
6247 #define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
6248 ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
6249 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
6250 ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
6251
6252 #endif /* __OPTIMIZE__ */
6253
6254 /* Intrinsics vf[,c]maddcph. */
6255 extern __inline __m512h
6256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmadd_pch(__m512h __A,__m512h __B,__m512h __C)6257 _mm512_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6258 {
6259 return (__m512h)
6260 __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6261 (__v32hf) __B,
6262 (__v32hf) __C,
6263 _MM_FROUND_CUR_DIRECTION);
6264 }
6265
6266 extern __inline __m512h
6267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmadd_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6268 _mm512_mask_fcmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6269 {
6270 return (__m512h)
6271 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6272 (__v32hf) __C,
6273 (__v32hf) __D, __B,
6274 _MM_FROUND_CUR_DIRECTION);
6275 }
6276
6277 extern __inline __m512h
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fcmadd_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D)6279 _mm512_mask3_fcmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6280 {
6281 return (__m512h)
6282 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6283 (__v32hf) __B,
6284 (__v32hf) __C,
6285 __D, _MM_FROUND_CUR_DIRECTION);
6286 }
6287
6288 extern __inline __m512h
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmadd_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D)6290 _mm512_maskz_fcmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6291 {
6292 return (__m512h)
6293 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6294 (__v32hf) __C,
6295 (__v32hf) __D,
6296 __A, _MM_FROUND_CUR_DIRECTION);
6297 }
6298
6299 extern __inline __m512h
6300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_pch(__m512h __A,__m512h __B,__m512h __C)6301 _mm512_fmadd_pch (__m512h __A, __m512h __B, __m512h __C)
6302 {
6303 return (__m512h)
6304 __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6305 (__v32hf) __B,
6306 (__v32hf) __C,
6307 _MM_FROUND_CUR_DIRECTION);
6308 }
6309
6310 extern __inline __m512h
6311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6312 _mm512_mask_fmadd_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6313 {
6314 return (__m512h)
6315 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6316 (__v32hf) __C,
6317 (__v32hf) __D, __B,
6318 _MM_FROUND_CUR_DIRECTION);
6319 }
6320
6321 extern __inline __m512h
6322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D)6323 _mm512_mask3_fmadd_pch (__m512h __A, __m512h __B, __m512h __C, __mmask16 __D)
6324 {
6325 return (__m512h)
6326 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6327 (__v32hf) __B,
6328 (__v32hf) __C,
6329 __D, _MM_FROUND_CUR_DIRECTION);
6330 }
6331
6332 extern __inline __m512h
6333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D)6334 _mm512_maskz_fmadd_pch (__mmask16 __A, __m512h __B, __m512h __C, __m512h __D)
6335 {
6336 return (__m512h)
6337 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6338 (__v32hf) __C,
6339 (__v32hf) __D,
6340 __A, _MM_FROUND_CUR_DIRECTION);
6341 }
6342
6343 #ifdef __OPTIMIZE__
6344 extern __inline __m512h
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,const int __D)6346 _mm512_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6347 {
6348 return (__m512h)
6349 __builtin_ia32_vfcmaddcph512_round ((__v32hf) __A,
6350 (__v32hf) __B,
6351 (__v32hf) __C,
6352 __D);
6353 }
6354
6355 extern __inline __m512h
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmadd_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6357 _mm512_mask_fcmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6358 __m512h __D, const int __E)
6359 {
6360 return (__m512h)
6361 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) __A,
6362 (__v32hf) __C,
6363 (__v32hf) __D, __B,
6364 __E);
6365 }
6366
6367 extern __inline __m512h
6368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fcmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D,const int __E)6369 _mm512_mask3_fcmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6370 __mmask16 __D, const int __E)
6371 {
6372 return (__m512h)
6373 __builtin_ia32_vfcmaddcph512_mask3_round ((__v32hf) __A,
6374 (__v32hf) __B,
6375 (__v32hf) __C,
6376 __D, __E);
6377 }
6378
6379 extern __inline __m512h
6380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmadd_round_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D,const int __E)6381 _mm512_maskz_fcmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6382 __m512h __D, const int __E)
6383 {
6384 return (__m512h)
6385 __builtin_ia32_vfcmaddcph512_maskz_round ((__v32hf) __B,
6386 (__v32hf) __C,
6387 (__v32hf) __D,
6388 __A, __E);
6389 }
6390
6391 extern __inline __m512h
6392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,const int __D)6393 _mm512_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C, const int __D)
6394 {
6395 return (__m512h)
6396 __builtin_ia32_vfmaddcph512_round ((__v32hf) __A,
6397 (__v32hf) __B,
6398 (__v32hf) __C,
6399 __D);
6400 }
6401
6402 extern __inline __m512h
6403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6404 _mm512_mask_fmadd_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6405 __m512h __D, const int __E)
6406 {
6407 return (__m512h)
6408 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) __A,
6409 (__v32hf) __C,
6410 (__v32hf) __D, __B,
6411 __E);
6412 }
6413
6414 extern __inline __m512h
6415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_pch(__m512h __A,__m512h __B,__m512h __C,__mmask16 __D,const int __E)6416 _mm512_mask3_fmadd_round_pch (__m512h __A, __m512h __B, __m512h __C,
6417 __mmask16 __D, const int __E)
6418 {
6419 return (__m512h)
6420 __builtin_ia32_vfmaddcph512_mask3_round ((__v32hf) __A,
6421 (__v32hf) __B,
6422 (__v32hf) __C,
6423 __D, __E);
6424 }
6425
6426 extern __inline __m512h
6427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_pch(__mmask16 __A,__m512h __B,__m512h __C,__m512h __D,const int __E)6428 _mm512_maskz_fmadd_round_pch (__mmask16 __A, __m512h __B, __m512h __C,
6429 __m512h __D, const int __E)
6430 {
6431 return (__m512h)
6432 __builtin_ia32_vfmaddcph512_maskz_round ((__v32hf) __B,
6433 (__v32hf) __C,
6434 (__v32hf) __D,
6435 __A, __E);
6436 }
6437
6438 #else
6439 #define _mm512_fcmadd_round_pch(A, B, C, D) \
6440 (__m512h) __builtin_ia32_vfcmaddcph512_round ((A), (B), (C), (D))
6441
6442 #define _mm512_mask_fcmadd_round_pch(A, B, C, D, E) \
6443 ((__m512h) \
6444 __builtin_ia32_vfcmaddcph512_mask_round ((__v32hf) (A), \
6445 (__v32hf) (C), \
6446 (__v32hf) (D), \
6447 (B), (E)))
6448
6449
6450 #define _mm512_mask3_fcmadd_round_pch(A, B, C, D, E) \
6451 ((__m512h) \
6452 __builtin_ia32_vfcmaddcph512_mask3_round ((A), (B), (C), (D), (E)))
6453
6454 #define _mm512_maskz_fcmadd_round_pch(A, B, C, D, E) \
6455 (__m512h) \
6456 __builtin_ia32_vfcmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6457
6458 #define _mm512_fmadd_round_pch(A, B, C, D) \
6459 (__m512h) __builtin_ia32_vfmaddcph512_round ((A), (B), (C), (D))
6460
6461 #define _mm512_mask_fmadd_round_pch(A, B, C, D, E) \
6462 ((__m512h) \
6463 __builtin_ia32_vfmaddcph512_mask_round ((__v32hf) (A), \
6464 (__v32hf) (C), \
6465 (__v32hf) (D), \
6466 (B), (E)))
6467
6468 #define _mm512_mask3_fmadd_round_pch(A, B, C, D, E) \
6469 (__m512h) \
6470 __builtin_ia32_vfmaddcph512_mask3_round ((A), (B), (C), (D), (E))
6471
6472 #define _mm512_maskz_fmadd_round_pch(A, B, C, D, E) \
6473 (__m512h) \
6474 __builtin_ia32_vfmaddcph512_maskz_round ((B), (C), (D), (A), (E))
6475
6476 #endif /* __OPTIMIZE__ */
6477
6478 /* Intrinsics vf[,c]mulcph. */
6479 extern __inline __m512h
6480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmul_pch(__m512h __A,__m512h __B)6481 _mm512_fcmul_pch (__m512h __A, __m512h __B)
6482 {
6483 return (__m512h)
6484 __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6485 (__v32hf) __B,
6486 _MM_FROUND_CUR_DIRECTION);
6487 }
6488
6489 extern __inline __m512h
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmul_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6491 _mm512_mask_fcmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6492 {
6493 return (__m512h)
6494 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6495 (__v32hf) __D,
6496 (__v32hf) __A,
6497 __B, _MM_FROUND_CUR_DIRECTION);
6498 }
6499
6500 extern __inline __m512h
6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmul_pch(__mmask16 __A,__m512h __B,__m512h __C)6502 _mm512_maskz_fcmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6503 {
6504 return (__m512h)
6505 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6506 (__v32hf) __C,
6507 _mm512_setzero_ph (),
6508 __A, _MM_FROUND_CUR_DIRECTION);
6509 }
6510
6511 extern __inline __m512h
6512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmul_pch(__m512h __A,__m512h __B)6513 _mm512_fmul_pch (__m512h __A, __m512h __B)
6514 {
6515 return (__m512h)
6516 __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6517 (__v32hf) __B,
6518 _MM_FROUND_CUR_DIRECTION);
6519 }
6520
6521 extern __inline __m512h
6522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmul_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D)6523 _mm512_mask_fmul_pch (__m512h __A, __mmask16 __B, __m512h __C, __m512h __D)
6524 {
6525 return (__m512h)
6526 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6527 (__v32hf) __D,
6528 (__v32hf) __A,
6529 __B, _MM_FROUND_CUR_DIRECTION);
6530 }
6531
6532 extern __inline __m512h
6533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmul_pch(__mmask16 __A,__m512h __B,__m512h __C)6534 _mm512_maskz_fmul_pch (__mmask16 __A, __m512h __B, __m512h __C)
6535 {
6536 return (__m512h)
6537 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6538 (__v32hf) __C,
6539 _mm512_setzero_ph (),
6540 __A, _MM_FROUND_CUR_DIRECTION);
6541 }
6542
6543 #ifdef __OPTIMIZE__
6544 extern __inline __m512h
6545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fcmul_round_pch(__m512h __A,__m512h __B,const int __D)6546 _mm512_fcmul_round_pch (__m512h __A, __m512h __B, const int __D)
6547 {
6548 return (__m512h)
6549 __builtin_ia32_vfcmulcph512_round ((__v32hf) __A,
6550 (__v32hf) __B, __D);
6551 }
6552
6553 extern __inline __m512h
6554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fcmul_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6555 _mm512_mask_fcmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6556 __m512h __D, const int __E)
6557 {
6558 return (__m512h)
6559 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __C,
6560 (__v32hf) __D,
6561 (__v32hf) __A,
6562 __B, __E);
6563 }
6564
6565 extern __inline __m512h
6566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fcmul_round_pch(__mmask16 __A,__m512h __B,__m512h __C,const int __E)6567 _mm512_maskz_fcmul_round_pch (__mmask16 __A, __m512h __B,
6568 __m512h __C, const int __E)
6569 {
6570 return (__m512h)
6571 __builtin_ia32_vfcmulcph512_mask_round ((__v32hf) __B,
6572 (__v32hf) __C,
6573 _mm512_setzero_ph (),
6574 __A, __E);
6575 }
6576
6577 extern __inline __m512h
6578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmul_round_pch(__m512h __A,__m512h __B,const int __D)6579 _mm512_fmul_round_pch (__m512h __A, __m512h __B, const int __D)
6580 {
6581 return (__m512h)
6582 __builtin_ia32_vfmulcph512_round ((__v32hf) __A,
6583 (__v32hf) __B,
6584 __D);
6585 }
6586
6587 extern __inline __m512h
6588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmul_round_pch(__m512h __A,__mmask16 __B,__m512h __C,__m512h __D,const int __E)6589 _mm512_mask_fmul_round_pch (__m512h __A, __mmask16 __B, __m512h __C,
6590 __m512h __D, const int __E)
6591 {
6592 return (__m512h)
6593 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __C,
6594 (__v32hf) __D,
6595 (__v32hf) __A,
6596 __B, __E);
6597 }
6598
6599 extern __inline __m512h
6600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmul_round_pch(__mmask16 __A,__m512h __B,__m512h __C,const int __E)6601 _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
6602 __m512h __C, const int __E)
6603 {
6604 return (__m512h)
6605 __builtin_ia32_vfmulcph512_mask_round ((__v32hf) __B,
6606 (__v32hf) __C,
6607 _mm512_setzero_ph (),
6608 __A, __E);
6609 }
6610
6611 #else
6612 #define _mm512_fcmul_round_pch(A, B, D) \
6613 (__m512h) __builtin_ia32_vfcmulcph512_round ((A), (B), (D))
6614
6615 #define _mm512_mask_fcmul_round_pch(A, B, C, D, E) \
6616 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((C), (D), (A), (B), (E))
6617
6618 #define _mm512_maskz_fcmul_round_pch(A, B, C, E) \
6619 (__m512h) __builtin_ia32_vfcmulcph512_mask_round ((B), (C), \
6620 (__v32hf) \
6621 _mm512_setzero_ph (), \
6622 (A), (E))
6623
6624 #define _mm512_fmul_round_pch(A, B, D) \
6625 (__m512h) __builtin_ia32_vfmulcph512_round ((A), (B), (D))
6626
6627 #define _mm512_mask_fmul_round_pch(A, B, C, D, E) \
6628 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((C), (D), (A), (B), (E))
6629
6630 #define _mm512_maskz_fmul_round_pch(A, B, C, E) \
6631 (__m512h) __builtin_ia32_vfmulcph512_mask_round ((B), (C), \
6632 (__v32hf) \
6633 _mm512_setzero_ph (), \
6634 (A), (E))
6635
6636 #endif /* __OPTIMIZE__ */
6637
6638 /* Intrinsics vf[,c]maddcsh. */
6639 extern __inline __m128h
6640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmadd_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6641 _mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6642 {
6643 return (__m128h)
6644 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6645 (__v8hf) __C,
6646 (__v8hf) __D, __B,
6647 _MM_FROUND_CUR_DIRECTION);
6648 }
6649
6650 extern __inline __m128h
6651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fcmadd_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D)6652 _mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6653 {
6654 return (__m128h)
6655 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6656 (__v8hf) __B,
6657 (__v8hf) __C, __D,
6658 _MM_FROUND_CUR_DIRECTION);
6659 }
6660
6661 extern __inline __m128h
6662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmadd_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D)6663 _mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6664 {
6665 return (__m128h)
6666 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6667 (__v8hf) __C,
6668 (__v8hf) __D,
6669 __A, _MM_FROUND_CUR_DIRECTION);
6670 }
6671
6672 extern __inline __m128h
6673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmadd_sch(__m128h __A,__m128h __B,__m128h __C)6674 _mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6675 {
6676 return (__m128h)
6677 __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6678 (__v8hf) __B,
6679 (__v8hf) __C,
6680 _MM_FROUND_CUR_DIRECTION);
6681 }
6682
6683 extern __inline __m128h
6684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6685 _mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6686 {
6687 return (__m128h)
6688 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6689 (__v8hf) __C,
6690 (__v8hf) __D, __B,
6691 _MM_FROUND_CUR_DIRECTION);
6692 }
6693
6694 extern __inline __m128h
6695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D)6696 _mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
6697 {
6698 return (__m128h)
6699 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6700 (__v8hf) __B,
6701 (__v8hf) __C, __D,
6702 _MM_FROUND_CUR_DIRECTION);
6703 }
6704
6705 extern __inline __m128h
6706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D)6707 _mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
6708 {
6709 return (__m128h)
6710 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6711 (__v8hf) __C,
6712 (__v8hf) __D,
6713 __A, _MM_FROUND_CUR_DIRECTION);
6714 }
6715
6716 extern __inline __m128h
6717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sch(__m128h __A,__m128h __B,__m128h __C)6718 _mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
6719 {
6720 return (__m128h)
6721 __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6722 (__v8hf) __B,
6723 (__v8hf) __C,
6724 _MM_FROUND_CUR_DIRECTION);
6725 }
6726
6727 #ifdef __OPTIMIZE__
6728 extern __inline __m128h
6729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmadd_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6730 _mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6731 __m128h __D, const int __E)
6732 {
6733 return (__m128h)
6734 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
6735 (__v8hf) __C,
6736 (__v8hf) __D,
6737 __B, __E);
6738 }
6739
6740 extern __inline __m128h
6741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fcmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D,const int __E)6742 _mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6743 __mmask8 __D, const int __E)
6744 {
6745 return (__m128h)
6746 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
6747 (__v8hf) __B,
6748 (__v8hf) __C,
6749 __D, __E);
6750 }
6751
6752 extern __inline __m128h
6753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmadd_round_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D,const int __E)6754 _mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6755 __m128h __D, const int __E)
6756 {
6757 return (__m128h)
6758 __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
6759 (__v8hf) __C,
6760 (__v8hf) __D,
6761 __A, __E);
6762 }
6763
6764 extern __inline __m128h
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,const int __D)6766 _mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6767 {
6768 return (__m128h)
6769 __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
6770 (__v8hf) __B,
6771 (__v8hf) __C,
6772 __D);
6773 }
6774
6775 extern __inline __m128h
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmadd_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6777 _mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6778 __m128h __D, const int __E)
6779 {
6780 return (__m128h)
6781 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
6782 (__v8hf) __C,
6783 (__v8hf) __D,
6784 __B, __E);
6785 }
6786
6787 extern __inline __m128h
6788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask3_fmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,__mmask8 __D,const int __E)6789 _mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
6790 __mmask8 __D, const int __E)
6791 {
6792 return (__m128h)
6793 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
6794 (__v8hf) __B,
6795 (__v8hf) __C,
6796 __D, __E);
6797 }
6798
6799 extern __inline __m128h
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmadd_round_sch(__mmask8 __A,__m128h __B,__m128h __C,__m128h __D,const int __E)6801 _mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6802 __m128h __D, const int __E)
6803 {
6804 return (__m128h)
6805 __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
6806 (__v8hf) __C,
6807 (__v8hf) __D,
6808 __A, __E);
6809 }
6810
6811 extern __inline __m128h
6812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_round_sch(__m128h __A,__m128h __B,__m128h __C,const int __D)6813 _mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
6814 {
6815 return (__m128h)
6816 __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
6817 (__v8hf) __B,
6818 (__v8hf) __C,
6819 __D);
6820 }
6821 #else
6822 #define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
6823 ((__m128h) \
6824 __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
6825 (__v8hf) (C), \
6826 (__v8hf) (D), \
6827 (B), (E)))
6828
6829
6830 #define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
6831 ((__m128h) \
6832 __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
6833 (__v8hf) (B), \
6834 (__v8hf) (C), \
6835 (D), (E)))
6836
6837 #define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
6838 __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6839
6840 #define _mm_fcmadd_round_sch(A, B, C, D) \
6841 __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
6842
6843 #define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
6844 ((__m128h) \
6845 __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
6846 (__v8hf) (C), \
6847 (__v8hf) (D), \
6848 (B), (E)))
6849
6850 #define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
6851 ((__m128h) \
6852 __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
6853 (__v8hf) (B), \
6854 (__v8hf) (C), \
6855 (D), (E)))
6856
6857 #define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
6858 __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
6859
6860 #define _mm_fmadd_round_sch(A, B, C, D) \
6861 __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
6862
6863 #endif /* __OPTIMIZE__ */
6864
6865 /* Intrinsics vf[,c]mulcsh. */
6866 extern __inline __m128h
6867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmul_sch(__m128h __A,__m128h __B)6868 _mm_fcmul_sch (__m128h __A, __m128h __B)
6869 {
6870 return (__m128h)
6871 __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6872 (__v8hf) __B,
6873 _MM_FROUND_CUR_DIRECTION);
6874 }
6875
6876 extern __inline __m128h
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmul_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6878 _mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6879 {
6880 return (__m128h)
6881 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6882 (__v8hf) __D,
6883 (__v8hf) __A,
6884 __B, _MM_FROUND_CUR_DIRECTION);
6885 }
6886
6887 extern __inline __m128h
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmul_sch(__mmask8 __A,__m128h __B,__m128h __C)6889 _mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6890 {
6891 return (__m128h)
6892 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6893 (__v8hf) __C,
6894 _mm_setzero_ph (),
6895 __A, _MM_FROUND_CUR_DIRECTION);
6896 }
6897
6898 extern __inline __m128h
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmul_sch(__m128h __A,__m128h __B)6900 _mm_fmul_sch (__m128h __A, __m128h __B)
6901 {
6902 return (__m128h)
6903 __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6904 (__v8hf) __B,
6905 _MM_FROUND_CUR_DIRECTION);
6906 }
6907
6908 extern __inline __m128h
6909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmul_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D)6910 _mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
6911 {
6912 return (__m128h)
6913 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6914 (__v8hf) __D,
6915 (__v8hf) __A,
6916 __B, _MM_FROUND_CUR_DIRECTION);
6917 }
6918
6919 extern __inline __m128h
6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmul_sch(__mmask8 __A,__m128h __B,__m128h __C)6921 _mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
6922 {
6923 return (__m128h)
6924 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6925 (__v8hf) __C,
6926 _mm_setzero_ph (),
6927 __A, _MM_FROUND_CUR_DIRECTION);
6928 }
6929
6930 #ifdef __OPTIMIZE__
6931 extern __inline __m128h
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fcmul_round_sch(__m128h __A,__m128h __B,const int __D)6933 _mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
6934 {
6935 return (__m128h)
6936 __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
6937 (__v8hf) __B,
6938 __D);
6939 }
6940
6941 extern __inline __m128h
6942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fcmul_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6943 _mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6944 __m128h __D, const int __E)
6945 {
6946 return (__m128h)
6947 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
6948 (__v8hf) __D,
6949 (__v8hf) __A,
6950 __B, __E);
6951 }
6952
6953 extern __inline __m128h
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fcmul_round_sch(__mmask8 __A,__m128h __B,__m128h __C,const int __E)6955 _mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
6956 const int __E)
6957 {
6958 return (__m128h)
6959 __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
6960 (__v8hf) __C,
6961 _mm_setzero_ph (),
6962 __A, __E);
6963 }
6964
6965 extern __inline __m128h
6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmul_round_sch(__m128h __A,__m128h __B,const int __D)6967 _mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
6968 {
6969 return (__m128h)
6970 __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
6971 (__v8hf) __B, __D);
6972 }
6973
6974 extern __inline __m128h
6975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmul_round_sch(__m128h __A,__mmask8 __B,__m128h __C,__m128h __D,const int __E)6976 _mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
6977 __m128h __D, const int __E)
6978 {
6979 return (__m128h)
6980 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
6981 (__v8hf) __D,
6982 (__v8hf) __A,
6983 __B, __E);
6984 }
6985
6986 extern __inline __m128h
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmul_round_sch(__mmask8 __A,__m128h __B,__m128h __C,const int __E)6988 _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
6989 {
6990 return (__m128h)
6991 __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
6992 (__v8hf) __C,
6993 _mm_setzero_ph (),
6994 __A, __E);
6995 }
6996
6997 #else
6998 #define _mm_fcmul_round_sch(__A, __B, __D) \
6999 (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
7000 (__v8hf) __B, __D)
7001
7002 #define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
7003 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
7004 (__v8hf) __D, \
7005 (__v8hf) __A, \
7006 __B, __E)
7007
7008 #define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
7009 (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
7010 (__v8hf) __C, \
7011 _mm_setzero_ph (), \
7012 __A, __E)
7013
7014 #define _mm_fmul_round_sch(__A, __B, __D) \
7015 (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
7016 (__v8hf) __B, __D)
7017
7018 #define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
7019 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
7020 (__v8hf) __D, \
7021 (__v8hf) __A, \
7022 __B, __E)
7023
7024 #define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
7025 (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
7026 (__v8hf) __C, \
7027 _mm_setzero_ph (), \
7028 __A, __E)
7029
7030 #endif /* __OPTIMIZE__ */
7031
7032 #define _MM512_REDUCE_OP(op) \
7033 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7034 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7035 __m256h __T3 = (__T1 op __T2); \
7036 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7037 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7038 __m128h __T6 = (__T4 op __T5); \
7039 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7040 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
7041 __m128h __T8 = (__T6 op __T7); \
7042 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7043 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
7044 __m128h __T10 = __T8 op __T9; \
7045 return __T10[0] op __T10[1]
7046
7047 // TODO reduce
7048 extern __inline _Float16
7049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_add_ph(__m512h __A)7050 _mm512_reduce_add_ph (__m512h __A)
7051 {
7052 _MM512_REDUCE_OP (+);
7053 }
7054
7055 extern __inline _Float16
7056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_mul_ph(__m512h __A)7057 _mm512_reduce_mul_ph (__m512h __A)
7058 {
7059 _MM512_REDUCE_OP (*);
7060 }
7061
7062 #undef _MM512_REDUCE_OP
7063
7064 #ifdef __AVX512VL__
7065
7066 #define _MM512_REDUCE_OP(op) \
7067 __m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
7068 __m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
7069 __m256h __T3 = __builtin_ia32_##op##ph256_mask (__T1, __T2, \
7070 _mm256_setzero_ph (), (__mmask16) -1); \
7071 __m128h __T4 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 0); \
7072 __m128h __T5 = (__m128h) _mm256_extractf128_pd ((__m256d) __T3, 1); \
7073 __m128h __T6 = __builtin_ia32_##op##ph128_mask \
7074 (__T4, __T5, _mm_setzero_ph (),(__mmask8) -1); \
7075 __m128h __T7 = (__m128h) __builtin_shuffle ((__m128h)__T6, \
7076 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
7077 __m128h __T8 = (__m128h) __builtin_ia32_##op##ph128_mask \
7078 (__T6, __T7, _mm_setzero_ph (),(__mmask8) -1); \
7079 __m128h __T9 = (__m128h) __builtin_shuffle ((__m128h)__T8, \
7080 (__v8hi) { 4, 5 }); \
7081 __m128h __T10 = __builtin_ia32_##op##ph128_mask \
7082 (__T8, __T9, _mm_setzero_ph (),(__mmask8) -1); \
7083 __m128h __T11 = (__m128h) __builtin_shuffle (__T10, \
7084 (__v8hi) { 1, 0 }); \
7085 __m128h __T12 = __builtin_ia32_##op##ph128_mask \
7086 (__T10, __T11, _mm_setzero_ph (),(__mmask8) -1); \
7087 return __T12[0]
7088
7089 #else
7090
7091 #define _MM512_REDUCE_OP(op) \
7092 __m512h __T1 = (__m512h) __builtin_shuffle ((__m512d) __A, \
7093 (__v8di) { 4, 5, 6, 7, 0, 0, 0, 0 }); \
7094 __m512h __T2 = _mm512_##op##_ph (__A, __T1); \
7095 __m512h __T3 = (__m512h) __builtin_shuffle ((__m512d) __T2, \
7096 (__v8di) { 2, 3, 0, 0, 0, 0, 0, 0 }); \
7097 __m512h __T4 = _mm512_##op##_ph (__T2, __T3); \
7098 __m512h __T5 = (__m512h) __builtin_shuffle ((__m512d) __T4, \
7099 (__v8di) { 1, 0, 0, 0, 0, 0, 0, 0 }); \
7100 __m512h __T6 = _mm512_##op##_ph (__T4, __T5); \
7101 __m512h __T7 = (__m512h) __builtin_shuffle ((__m512) __T6, \
7102 (__v16si) { 1, 0, 0, 0, 0, 0, 0, 0, \
7103 0, 0, 0, 0, 0, 0, 0, 0 }); \
7104 __m512h __T8 = _mm512_##op##_ph (__T6, __T7); \
7105 __m512h __T9 = (__m512h) __builtin_shuffle (__T8, \
7106 (__v32hi) { 1, 0, 0, 0, 0, 0, 0, 0, \
7107 0, 0, 0, 0, 0, 0, 0, 0, \
7108 0, 0, 0, 0, 0, 0, 0, 0, \
7109 0, 0, 0, 0, 0, 0, 0, 0 }); \
7110 __m512h __T10 = _mm512_##op##_ph (__T8, __T9); \
7111 return __T10[0]
7112 #endif
7113
7114 extern __inline _Float16
7115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_min_ph(__m512h __A)7116 _mm512_reduce_min_ph (__m512h __A)
7117 {
7118 _MM512_REDUCE_OP (min);
7119 }
7120
7121 extern __inline _Float16
7122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_reduce_max_ph(__m512h __A)7123 _mm512_reduce_max_ph (__m512h __A)
7124 {
7125 _MM512_REDUCE_OP (max);
7126 }
7127
7128 #undef _MM512_REDUCE_OP
7129
7130 extern __inline __m512h
7131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_blend_ph(__mmask32 __U,__m512h __A,__m512h __W)7132 _mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W)
7133 {
7134 return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W,
7135 (__v32hi) __A,
7136 (__mmask32) __U);
7137
7138 }
7139
7140 extern __inline __m512h
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutex2var_ph(__m512h __A,__m512i __I,__m512h __B)7142 _mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B)
7143 {
7144 return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
7145 (__v32hi) __I,
7146 (__v32hi) __B,
7147 (__mmask32)-1);
7148 }
7149
7150 extern __inline __m512h
7151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutexvar_ph(__m512i __A,__m512h __B)7152 _mm512_permutexvar_ph (__m512i __A, __m512h __B)
7153 {
7154 return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
7155 (__v32hi) __A,
7156 (__v32hi)
7157 (_mm512_setzero_ph ()),
7158 (__mmask32)-1);
7159 }
7160
7161 extern __inline __m512h
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pch(_Float16 _Complex __A)7163 _mm512_set1_pch (_Float16 _Complex __A)
7164 {
7165 union
7166 {
7167 _Float16 _Complex __a;
7168 float __b;
7169 } __u = { .__a = __A};
7170
7171 return (__m512h) _mm512_set1_ps (__u.__b);
7172 }
7173
7174 // intrinsics below are alias for f*mul_*ch
7175 #define _mm512_mul_pch(A, B) _mm512_fmul_pch ((A), (B))
7176 #define _mm512_mask_mul_pch(W, U, A, B) \
7177 _mm512_mask_fmul_pch ((W), (U), (A), (B))
7178 #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch ((U), (A), (B))
7179 #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch ((A), (B), (R))
7180 #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
7181 _mm512_mask_fmul_round_pch ((W), (U), (A), (B), (R))
7182 #define _mm512_maskz_mul_round_pch(U, A, B, R) \
7183 _mm512_maskz_fmul_round_pch ((U), (A), (B), (R))
7184
7185 #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch ((A), (B))
7186 #define _mm512_mask_cmul_pch(W, U, A, B) \
7187 _mm512_mask_fcmul_pch ((W), (U), (A), (B))
7188 #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch ((U), (A), (B))
7189 #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch ((A), (B), (R))
7190 #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
7191 _mm512_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
7192 #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
7193 _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
7194
7195 #define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
7196 #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
7197 #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
7198 #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
7199 #define _mm_mask_mul_round_sch(W, U, A, B, R) \
7200 _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
7201 #define _mm_maskz_mul_round_sch(U, A, B, R) \
7202 _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
7203
7204 #define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
7205 #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
7206 #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
7207 #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
7208 #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
7209 _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
7210 #define _mm_maskz_cmul_round_sch(U, A, B, R) \
7211 _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
7212
7213 #ifdef __DISABLE_AVX512FP16__
7214 #undef __DISABLE_AVX512FP16__
7215 #pragma GCC pop_options
7216 #endif /* __DISABLE_AVX512FP16__ */
7217
7218 #endif /* __AVX512FP16INTRIN_H_INCLUDED */
7219