1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512FINTRIN_H_INCLUDED 29 #define _AVX512FINTRIN_H_INCLUDED 30 31 #ifndef __AVX512F__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512f") 34 #define __DISABLE_AVX512F__ 35 #endif /* __AVX512F__ */ 36 37 /* Internal data types for implementing the intrinsics. */ 38 typedef double __v8df __attribute__ ((__vector_size__ (64))); 39 typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40 typedef long long __v8di __attribute__ ((__vector_size__ (64))); 41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); 42 typedef int __v16si __attribute__ ((__vector_size__ (64))); 43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); 44 typedef short __v32hi __attribute__ ((__vector_size__ (64))); 45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); 46 typedef char __v64qi __attribute__ ((__vector_size__ (64))); 47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); 48 49 /* The Intel API is flexible enough that we must allow aliasing with other 50 vector types, and their scalar components. */ 51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); 53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 54 55 /* Unaligned version of the same type. */ 56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 59 60 typedef unsigned char __mmask8; 61 typedef unsigned short __mmask16; 62 63 extern __inline __mmask16 64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 65 _mm512_int2mask (int __M) 66 { 67 return (__mmask16) __M; 68 } 69 70 extern __inline int 71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 72 _mm512_mask2int (__mmask16 __M) 73 { 74 return (int) __M; 75 } 76 77 extern __inline __m512i 78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 79 _mm512_set_epi64 (long long __A, long long __B, long long __C, 80 long long __D, long long __E, long long __F, 81 long long __G, long long __H) 82 { 83 return __extension__ (__m512i) (__v8di) 84 { __H, __G, __F, __E, __D, __C, __B, __A }; 85 } 86 87 /* Create the vector [A B C D E F G H I J K L M N O P]. */ 88 extern __inline __m512i 89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 90 _mm512_set_epi32 (int __A, int __B, int __C, int __D, 91 int __E, int __F, int __G, int __H, 92 int __I, int __J, int __K, int __L, 93 int __M, int __N, int __O, int __P) 94 { 95 return __extension__ (__m512i)(__v16si) 96 { __P, __O, __N, __M, __L, __K, __J, __I, 97 __H, __G, __F, __E, __D, __C, __B, __A }; 98 } 99 100 extern __inline __m512i 101 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 102 _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28, 103 short __q27, short __q26, short __q25, short __q24, 104 short __q23, short __q22, short __q21, short __q20, 105 short __q19, short __q18, short __q17, short __q16, 106 short __q15, short __q14, short __q13, short __q12, 107 short __q11, short __q10, short __q09, short __q08, 108 short __q07, short __q06, short __q05, short __q04, 109 short __q03, short __q02, short __q01, short __q00) 110 { 111 return __extension__ (__m512i)(__v32hi){ 112 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 113 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, 114 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, 115 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31 116 }; 117 } 118 119 extern __inline __m512i 120 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 121 _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60, 122 char __q59, char __q58, char __q57, char __q56, 123 char __q55, char __q54, char __q53, char __q52, 124 char __q51, char __q50, char __q49, char __q48, 125 char __q47, char __q46, char __q45, char __q44, 126 char __q43, char __q42, char __q41, char __q40, 127 char __q39, char __q38, char __q37, char __q36, 128 char __q35, char __q34, char __q33, char __q32, 129 char __q31, char __q30, char __q29, char __q28, 130 char __q27, char __q26, char __q25, char __q24, 131 char __q23, char __q22, char __q21, char __q20, 132 char __q19, char __q18, char __q17, char __q16, 133 char __q15, char __q14, char __q13, char __q12, 134 char __q11, char __q10, char __q09, char __q08, 135 char __q07, char __q06, char __q05, char __q04, 136 char __q03, char __q02, char __q01, char __q00) 137 { 138 return __extension__ (__m512i)(__v64qi){ 139 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 140 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, 141 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, 142 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31, 143 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39, 144 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47, 145 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55, 146 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63 147 }; 148 } 149 150 extern __inline __m512d 151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 152 _mm512_set_pd (double __A, double __B, double __C, double __D, 153 double __E, double __F, double __G, double __H) 154 { 155 return __extension__ (__m512d) 156 { __H, __G, __F, __E, __D, __C, __B, __A }; 157 } 158 159 extern __inline __m512 160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 161 _mm512_set_ps (float __A, float __B, float __C, float __D, 162 float __E, float __F, float __G, float __H, 163 float __I, float __J, float __K, float __L, 164 float __M, float __N, float __O, float __P) 165 { 166 return __extension__ (__m512) 167 { __P, __O, __N, __M, __L, __K, __J, __I, 168 __H, __G, __F, __E, __D, __C, __B, __A }; 169 } 170 171 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 172 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) 173 174 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 175 e8,e9,e10,e11,e12,e13,e14,e15) \ 176 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 177 178 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 179 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) 180 181 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 182 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 183 184 extern __inline __m512 185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 186 _mm512_undefined_ps (void) 187 { 188 __m512 __Y = __Y; 189 return __Y; 190 } 191 192 #define _mm512_undefined _mm512_undefined_ps 193 194 extern __inline __m512d 195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 196 _mm512_undefined_pd (void) 197 { 198 __m512d __Y = __Y; 199 return __Y; 200 } 201 202 extern __inline __m512i 203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 204 _mm512_undefined_epi32 (void) 205 { 206 __m512i __Y = __Y; 207 return __Y; 208 } 209 210 #define _mm512_undefined_si512 _mm512_undefined_epi32 211 212 extern __inline __m512i 213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 214 _mm512_set1_epi8 (char __A) 215 { 216 return __extension__ (__m512i)(__v64qi) 217 { __A, __A, __A, __A, __A, __A, __A, __A, 218 __A, __A, __A, __A, __A, __A, __A, __A, 219 __A, __A, __A, __A, __A, __A, __A, __A, 220 __A, __A, __A, __A, __A, __A, __A, __A, 221 __A, __A, __A, __A, __A, __A, __A, __A, 222 __A, __A, __A, __A, __A, __A, __A, __A, 223 __A, __A, __A, __A, __A, __A, __A, __A, 224 __A, __A, __A, __A, __A, __A, __A, __A }; 225 } 226 227 extern __inline __m512i 228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 229 _mm512_set1_epi16 (short __A) 230 { 231 return __extension__ (__m512i)(__v32hi) 232 { __A, __A, __A, __A, __A, __A, __A, __A, 233 __A, __A, __A, __A, __A, __A, __A, __A, 234 __A, __A, __A, __A, __A, __A, __A, __A, 235 __A, __A, __A, __A, __A, __A, __A, __A }; 236 } 237 238 extern __inline __m512d 239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 240 _mm512_set1_pd (double __A) 241 { 242 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ 243 (__v2df) { __A, }, 244 (__v8df) 245 _mm512_undefined_pd (), 246 (__mmask8) -1); 247 } 248 249 extern __inline __m512 250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 251 _mm512_set1_ps (float __A) 252 { 253 return (__m512) __builtin_ia32_broadcastss512 (__extension__ 254 (__v4sf) { __A, }, 255 (__v16sf) 256 _mm512_undefined_ps (), 257 (__mmask16) -1); 258 } 259 260 /* Create the vector [A B C D A B C D A B C D A B C D]. */ 261 extern __inline __m512i 262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 263 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) 264 { 265 return __extension__ (__m512i)(__v16si) 266 { __D, __C, __B, __A, __D, __C, __B, __A, 267 __D, __C, __B, __A, __D, __C, __B, __A }; 268 } 269 270 extern __inline __m512i 271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 272 _mm512_set4_epi64 (long long __A, long long __B, long long __C, 273 long long __D) 274 { 275 return __extension__ (__m512i) (__v8di) 276 { __D, __C, __B, __A, __D, __C, __B, __A }; 277 } 278 279 extern __inline __m512d 280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 281 _mm512_set4_pd (double __A, double __B, double __C, double __D) 282 { 283 return __extension__ (__m512d) 284 { __D, __C, __B, __A, __D, __C, __B, __A }; 285 } 286 287 extern __inline __m512 288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 289 _mm512_set4_ps (float __A, float __B, float __C, float __D) 290 { 291 return __extension__ (__m512) 292 { __D, __C, __B, __A, __D, __C, __B, __A, 293 __D, __C, __B, __A, __D, __C, __B, __A }; 294 } 295 296 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 297 _mm512_set4_epi64(e3,e2,e1,e0) 298 299 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 300 _mm512_set4_epi32(e3,e2,e1,e0) 301 302 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 303 _mm512_set4_pd(e3,e2,e1,e0) 304 305 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 306 _mm512_set4_ps(e3,e2,e1,e0) 307 308 extern __inline __m512 309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 310 _mm512_setzero_ps (void) 311 { 312 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 313 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 314 } 315 316 extern __inline __m512 317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 318 _mm512_setzero (void) 319 { 320 return _mm512_setzero_ps (); 321 } 322 323 extern __inline __m512d 324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 325 _mm512_setzero_pd (void) 326 { 327 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 328 } 329 330 extern __inline __m512i 331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 332 _mm512_setzero_epi32 (void) 333 { 334 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 335 } 336 337 extern __inline __m512i 338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 339 _mm512_setzero_si512 (void) 340 { 341 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 342 } 343 344 extern __inline __m512d 345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 346 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 347 { 348 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 349 (__v8df) __W, 350 (__mmask8) __U); 351 } 352 353 extern __inline __m512d 354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 355 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 356 { 357 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 358 (__v8df) 359 _mm512_setzero_pd (), 360 (__mmask8) __U); 361 } 362 363 extern __inline __m512 364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 365 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 366 { 367 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 368 (__v16sf) __W, 369 (__mmask16) __U); 370 } 371 372 extern __inline __m512 373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 374 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 375 { 376 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 377 (__v16sf) 378 _mm512_setzero_ps (), 379 (__mmask16) __U); 380 } 381 382 extern __inline __m512d 383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 384 _mm512_load_pd (void const *__P) 385 { 386 return *(__m512d *) __P; 387 } 388 389 extern __inline __m512d 390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 391 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 392 { 393 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 394 (__v8df) __W, 395 (__mmask8) __U); 396 } 397 398 extern __inline __m512d 399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 400 _mm512_maskz_load_pd (__mmask8 __U, void const *__P) 401 { 402 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 403 (__v8df) 404 _mm512_setzero_pd (), 405 (__mmask8) __U); 406 } 407 408 extern __inline void 409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 410 _mm512_store_pd (void *__P, __m512d __A) 411 { 412 *(__m512d *) __P = __A; 413 } 414 415 extern __inline void 416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 417 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) 418 { 419 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, 420 (__mmask8) __U); 421 } 422 423 extern __inline __m512 424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 425 _mm512_load_ps (void const *__P) 426 { 427 return *(__m512 *) __P; 428 } 429 430 extern __inline __m512 431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 432 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 433 { 434 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 435 (__v16sf) __W, 436 (__mmask16) __U); 437 } 438 439 extern __inline __m512 440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 441 _mm512_maskz_load_ps (__mmask16 __U, void const *__P) 442 { 443 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 444 (__v16sf) 445 _mm512_setzero_ps (), 446 (__mmask16) __U); 447 } 448 449 extern __inline void 450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 451 _mm512_store_ps (void *__P, __m512 __A) 452 { 453 *(__m512 *) __P = __A; 454 } 455 456 extern __inline void 457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 458 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) 459 { 460 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, 461 (__mmask16) __U); 462 } 463 464 extern __inline __m512i 465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 466 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 467 { 468 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 469 (__v8di) __W, 470 (__mmask8) __U); 471 } 472 473 extern __inline __m512i 474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 475 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 476 { 477 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 478 (__v8di) 479 _mm512_setzero_si512 (), 480 (__mmask8) __U); 481 } 482 483 extern __inline __m512i 484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 485 _mm512_load_epi64 (void const *__P) 486 { 487 return *(__m512i *) __P; 488 } 489 490 extern __inline __m512i 491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 492 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 493 { 494 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 495 (__v8di) __W, 496 (__mmask8) __U); 497 } 498 499 extern __inline __m512i 500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 501 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 502 { 503 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 504 (__v8di) 505 _mm512_setzero_si512 (), 506 (__mmask8) __U); 507 } 508 509 extern __inline void 510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 511 _mm512_store_epi64 (void *__P, __m512i __A) 512 { 513 *(__m512i *) __P = __A; 514 } 515 516 extern __inline void 517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 518 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 519 { 520 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 521 (__mmask8) __U); 522 } 523 524 extern __inline __m512i 525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 526 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 527 { 528 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 529 (__v16si) __W, 530 (__mmask16) __U); 531 } 532 533 extern __inline __m512i 534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 535 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 536 { 537 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 538 (__v16si) 539 _mm512_setzero_si512 (), 540 (__mmask16) __U); 541 } 542 543 extern __inline __m512i 544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 545 _mm512_load_si512 (void const *__P) 546 { 547 return *(__m512i *) __P; 548 } 549 550 extern __inline __m512i 551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 552 _mm512_load_epi32 (void const *__P) 553 { 554 return *(__m512i *) __P; 555 } 556 557 extern __inline __m512i 558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 559 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 560 { 561 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 562 (__v16si) __W, 563 (__mmask16) __U); 564 } 565 566 extern __inline __m512i 567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 568 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 569 { 570 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 571 (__v16si) 572 _mm512_setzero_si512 (), 573 (__mmask16) __U); 574 } 575 576 extern __inline void 577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 578 _mm512_store_si512 (void *__P, __m512i __A) 579 { 580 *(__m512i *) __P = __A; 581 } 582 583 extern __inline void 584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 585 _mm512_store_epi32 (void *__P, __m512i __A) 586 { 587 *(__m512i *) __P = __A; 588 } 589 590 extern __inline void 591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 592 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 593 { 594 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 595 (__mmask16) __U); 596 } 597 598 extern __inline __m512i 599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 600 _mm512_mullo_epi32 (__m512i __A, __m512i __B) 601 { 602 return (__m512i) ((__v16su) __A * (__v16su) __B); 603 } 604 605 extern __inline __m512i 606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 607 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 608 { 609 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 610 (__v16si) __B, 611 (__v16si) 612 _mm512_setzero_si512 (), 613 __M); 614 } 615 616 extern __inline __m512i 617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 618 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 619 { 620 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 621 (__v16si) __B, 622 (__v16si) __W, __M); 623 } 624 625 extern __inline __m512i 626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 627 _mm512_mullox_epi64 (__m512i __A, __m512i __B) 628 { 629 return (__m512i) ((__v8du) __A * (__v8du) __B); 630 } 631 632 extern __inline __m512i 633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 634 _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 635 { 636 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B)); 637 } 638 639 extern __inline __m512i 640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 641 _mm512_sllv_epi32 (__m512i __X, __m512i __Y) 642 { 643 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 644 (__v16si) __Y, 645 (__v16si) 646 _mm512_undefined_epi32 (), 647 (__mmask16) -1); 648 } 649 650 extern __inline __m512i 651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 652 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 653 { 654 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 655 (__v16si) __Y, 656 (__v16si) __W, 657 (__mmask16) __U); 658 } 659 660 extern __inline __m512i 661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 662 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 663 { 664 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 665 (__v16si) __Y, 666 (__v16si) 667 _mm512_setzero_si512 (), 668 (__mmask16) __U); 669 } 670 671 extern __inline __m512i 672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 673 _mm512_srav_epi32 (__m512i __X, __m512i __Y) 674 { 675 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 676 (__v16si) __Y, 677 (__v16si) 678 _mm512_undefined_epi32 (), 679 (__mmask16) -1); 680 } 681 682 extern __inline __m512i 683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 684 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 685 { 686 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 687 (__v16si) __Y, 688 (__v16si) __W, 689 (__mmask16) __U); 690 } 691 692 extern __inline __m512i 693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 694 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 695 { 696 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 697 (__v16si) __Y, 698 (__v16si) 699 _mm512_setzero_si512 (), 700 (__mmask16) __U); 701 } 702 703 extern __inline __m512i 704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 705 _mm512_srlv_epi32 (__m512i __X, __m512i __Y) 706 { 707 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 708 (__v16si) __Y, 709 (__v16si) 710 _mm512_undefined_epi32 (), 711 (__mmask16) -1); 712 } 713 714 extern __inline __m512i 715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 716 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 717 { 718 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 719 (__v16si) __Y, 720 (__v16si) __W, 721 (__mmask16) __U); 722 } 723 724 extern __inline __m512i 725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 726 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 727 { 728 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 729 (__v16si) __Y, 730 (__v16si) 731 _mm512_setzero_si512 (), 732 (__mmask16) __U); 733 } 734 735 extern __inline __m512i 736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 737 _mm512_add_epi64 (__m512i __A, __m512i __B) 738 { 739 return (__m512i) ((__v8du) __A + (__v8du) __B); 740 } 741 742 extern __inline __m512i 743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 744 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 745 { 746 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 747 (__v8di) __B, 748 (__v8di) __W, 749 (__mmask8) __U); 750 } 751 752 extern __inline __m512i 753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 754 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 755 { 756 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 757 (__v8di) __B, 758 (__v8di) 759 _mm512_setzero_si512 (), 760 (__mmask8) __U); 761 } 762 763 extern __inline __m512i 764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 765 _mm512_sub_epi64 (__m512i __A, __m512i __B) 766 { 767 return (__m512i) ((__v8du) __A - (__v8du) __B); 768 } 769 770 extern __inline __m512i 771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 772 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 773 { 774 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 775 (__v8di) __B, 776 (__v8di) __W, 777 (__mmask8) __U); 778 } 779 780 extern __inline __m512i 781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 782 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 783 { 784 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 785 (__v8di) __B, 786 (__v8di) 787 _mm512_setzero_si512 (), 788 (__mmask8) __U); 789 } 790 791 extern __inline __m512i 792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 793 _mm512_sllv_epi64 (__m512i __X, __m512i __Y) 794 { 795 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 796 (__v8di) __Y, 797 (__v8di) 798 _mm512_undefined_pd (), 799 (__mmask8) -1); 800 } 801 802 extern __inline __m512i 803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 804 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 805 { 806 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 807 (__v8di) __Y, 808 (__v8di) __W, 809 (__mmask8) __U); 810 } 811 812 extern __inline __m512i 813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 814 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 815 { 816 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 817 (__v8di) __Y, 818 (__v8di) 819 _mm512_setzero_si512 (), 820 (__mmask8) __U); 821 } 822 823 extern __inline __m512i 824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 825 _mm512_srav_epi64 (__m512i __X, __m512i __Y) 826 { 827 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 828 (__v8di) __Y, 829 (__v8di) 830 _mm512_undefined_epi32 (), 831 (__mmask8) -1); 832 } 833 834 extern __inline __m512i 835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 836 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 837 { 838 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 839 (__v8di) __Y, 840 (__v8di) __W, 841 (__mmask8) __U); 842 } 843 844 extern __inline __m512i 845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 846 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 847 { 848 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 849 (__v8di) __Y, 850 (__v8di) 851 _mm512_setzero_si512 (), 852 (__mmask8) __U); 853 } 854 855 extern __inline __m512i 856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 857 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) 858 { 859 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 860 (__v8di) __Y, 861 (__v8di) 862 _mm512_undefined_epi32 (), 863 (__mmask8) -1); 864 } 865 866 extern __inline __m512i 867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 868 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 869 { 870 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 871 (__v8di) __Y, 872 (__v8di) __W, 873 (__mmask8) __U); 874 } 875 876 extern __inline __m512i 877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 878 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 879 { 880 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 881 (__v8di) __Y, 882 (__v8di) 883 _mm512_setzero_si512 (), 884 (__mmask8) __U); 885 } 886 887 extern __inline __m512i 888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 889 _mm512_add_epi32 (__m512i __A, __m512i __B) 890 { 891 return (__m512i) ((__v16su) __A + (__v16su) __B); 892 } 893 894 extern __inline __m512i 895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 896 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 897 { 898 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 899 (__v16si) __B, 900 (__v16si) __W, 901 (__mmask16) __U); 902 } 903 904 extern __inline __m512i 905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 906 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 907 { 908 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 909 (__v16si) __B, 910 (__v16si) 911 _mm512_setzero_si512 (), 912 (__mmask16) __U); 913 } 914 915 extern __inline __m512i 916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 917 _mm512_mul_epi32 (__m512i __X, __m512i __Y) 918 { 919 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 920 (__v16si) __Y, 921 (__v8di) 922 _mm512_undefined_epi32 (), 923 (__mmask8) -1); 924 } 925 926 extern __inline __m512i 927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 928 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 929 { 930 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 931 (__v16si) __Y, 932 (__v8di) __W, __M); 933 } 934 935 extern __inline __m512i 936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 937 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 938 { 939 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 940 (__v16si) __Y, 941 (__v8di) 942 _mm512_setzero_si512 (), 943 __M); 944 } 945 946 extern __inline __m512i 947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 948 _mm512_sub_epi32 (__m512i __A, __m512i __B) 949 { 950 return (__m512i) ((__v16su) __A - (__v16su) __B); 951 } 952 953 extern __inline __m512i 954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 955 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 956 { 957 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 958 (__v16si) __B, 959 (__v16si) __W, 960 (__mmask16) __U); 961 } 962 963 extern __inline __m512i 964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 965 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 966 { 967 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 968 (__v16si) __B, 969 (__v16si) 970 _mm512_setzero_si512 (), 971 (__mmask16) __U); 972 } 973 974 extern __inline __m512i 975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 976 _mm512_mul_epu32 (__m512i __X, __m512i __Y) 977 { 978 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 979 (__v16si) __Y, 980 (__v8di) 981 _mm512_undefined_epi32 (), 982 (__mmask8) -1); 983 } 984 985 extern __inline __m512i 986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 987 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 988 { 989 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 990 (__v16si) __Y, 991 (__v8di) __W, __M); 992 } 993 994 extern __inline __m512i 995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 996 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 997 { 998 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 999 (__v16si) __Y, 1000 (__v8di) 1001 _mm512_setzero_si512 (), 1002 __M); 1003 } 1004 1005 #ifdef __OPTIMIZE__ 1006 extern __inline __m512i 1007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1008 _mm512_slli_epi64 (__m512i __A, unsigned int __B) 1009 { 1010 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 1011 (__v8di) 1012 _mm512_undefined_epi32 (), 1013 (__mmask8) -1); 1014 } 1015 1016 extern __inline __m512i 1017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1018 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 1019 unsigned int __B) 1020 { 1021 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 1022 (__v8di) __W, 1023 (__mmask8) __U); 1024 } 1025 1026 extern __inline __m512i 1027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1028 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1029 { 1030 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 1031 (__v8di) 1032 _mm512_setzero_si512 (), 1033 (__mmask8) __U); 1034 } 1035 #else 1036 #define _mm512_slli_epi64(X, C) \ 1037 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1038 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 1039 (__mmask8)-1)) 1040 1041 #define _mm512_mask_slli_epi64(W, U, X, C) \ 1042 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1043 (__v8di)(__m512i)(W),\ 1044 (__mmask8)(U))) 1045 1046 #define _mm512_maskz_slli_epi64(U, X, C) \ 1047 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1048 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1049 (__mmask8)(U))) 1050 #endif 1051 1052 extern __inline __m512i 1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1054 _mm512_sll_epi64 (__m512i __A, __m128i __B) 1055 { 1056 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 1057 (__v2di) __B, 1058 (__v8di) 1059 _mm512_undefined_epi32 (), 1060 (__mmask8) -1); 1061 } 1062 1063 extern __inline __m512i 1064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1065 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1066 { 1067 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 1068 (__v2di) __B, 1069 (__v8di) __W, 1070 (__mmask8) __U); 1071 } 1072 1073 extern __inline __m512i 1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1075 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1076 { 1077 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 1078 (__v2di) __B, 1079 (__v8di) 1080 _mm512_setzero_si512 (), 1081 (__mmask8) __U); 1082 } 1083 1084 #ifdef __OPTIMIZE__ 1085 extern __inline __m512i 1086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1087 _mm512_srli_epi64 (__m512i __A, unsigned int __B) 1088 { 1089 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1090 (__v8di) 1091 _mm512_undefined_epi32 (), 1092 (__mmask8) -1); 1093 } 1094 1095 extern __inline __m512i 1096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1097 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, 1098 __m512i __A, unsigned int __B) 1099 { 1100 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1101 (__v8di) __W, 1102 (__mmask8) __U); 1103 } 1104 1105 extern __inline __m512i 1106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1107 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1108 { 1109 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1110 (__v8di) 1111 _mm512_setzero_si512 (), 1112 (__mmask8) __U); 1113 } 1114 #else 1115 #define _mm512_srli_epi64(X, C) \ 1116 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1117 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 1118 (__mmask8)-1)) 1119 1120 #define _mm512_mask_srli_epi64(W, U, X, C) \ 1121 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1122 (__v8di)(__m512i)(W),\ 1123 (__mmask8)(U))) 1124 1125 #define _mm512_maskz_srli_epi64(U, X, C) \ 1126 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1127 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1128 (__mmask8)(U))) 1129 #endif 1130 1131 extern __inline __m512i 1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1133 _mm512_srl_epi64 (__m512i __A, __m128i __B) 1134 { 1135 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1136 (__v2di) __B, 1137 (__v8di) 1138 _mm512_undefined_epi32 (), 1139 (__mmask8) -1); 1140 } 1141 1142 extern __inline __m512i 1143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1144 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1145 { 1146 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1147 (__v2di) __B, 1148 (__v8di) __W, 1149 (__mmask8) __U); 1150 } 1151 1152 extern __inline __m512i 1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1154 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1155 { 1156 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1157 (__v2di) __B, 1158 (__v8di) 1159 _mm512_setzero_si512 (), 1160 (__mmask8) __U); 1161 } 1162 1163 #ifdef __OPTIMIZE__ 1164 extern __inline __m512i 1165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1166 _mm512_srai_epi64 (__m512i __A, unsigned int __B) 1167 { 1168 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1169 (__v8di) 1170 _mm512_undefined_epi32 (), 1171 (__mmask8) -1); 1172 } 1173 1174 extern __inline __m512i 1175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1176 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 1177 unsigned int __B) 1178 { 1179 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1180 (__v8di) __W, 1181 (__mmask8) __U); 1182 } 1183 1184 extern __inline __m512i 1185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1186 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1187 { 1188 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1189 (__v8di) 1190 _mm512_setzero_si512 (), 1191 (__mmask8) __U); 1192 } 1193 #else 1194 #define _mm512_srai_epi64(X, C) \ 1195 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1196 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 1197 (__mmask8)-1)) 1198 1199 #define _mm512_mask_srai_epi64(W, U, X, C) \ 1200 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1201 (__v8di)(__m512i)(W),\ 1202 (__mmask8)(U))) 1203 1204 #define _mm512_maskz_srai_epi64(U, X, C) \ 1205 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1206 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1207 (__mmask8)(U))) 1208 #endif 1209 1210 extern __inline __m512i 1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1212 _mm512_sra_epi64 (__m512i __A, __m128i __B) 1213 { 1214 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1215 (__v2di) __B, 1216 (__v8di) 1217 _mm512_undefined_epi32 (), 1218 (__mmask8) -1); 1219 } 1220 1221 extern __inline __m512i 1222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1223 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1224 { 1225 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1226 (__v2di) __B, 1227 (__v8di) __W, 1228 (__mmask8) __U); 1229 } 1230 1231 extern __inline __m512i 1232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1233 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1234 { 1235 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1236 (__v2di) __B, 1237 (__v8di) 1238 _mm512_setzero_si512 (), 1239 (__mmask8) __U); 1240 } 1241 1242 #ifdef __OPTIMIZE__ 1243 extern __inline __m512i 1244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1245 _mm512_slli_epi32 (__m512i __A, unsigned int __B) 1246 { 1247 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1248 (__v16si) 1249 _mm512_undefined_epi32 (), 1250 (__mmask16) -1); 1251 } 1252 1253 extern __inline __m512i 1254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1255 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1256 unsigned int __B) 1257 { 1258 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1259 (__v16si) __W, 1260 (__mmask16) __U); 1261 } 1262 1263 extern __inline __m512i 1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1265 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1266 { 1267 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1268 (__v16si) 1269 _mm512_setzero_si512 (), 1270 (__mmask16) __U); 1271 } 1272 #else 1273 #define _mm512_slli_epi32(X, C) \ 1274 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1275 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1276 (__mmask16)-1)) 1277 1278 #define _mm512_mask_slli_epi32(W, U, X, C) \ 1279 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1280 (__v16si)(__m512i)(W),\ 1281 (__mmask16)(U))) 1282 1283 #define _mm512_maskz_slli_epi32(U, X, C) \ 1284 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1285 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1286 (__mmask16)(U))) 1287 #endif 1288 1289 extern __inline __m512i 1290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1291 _mm512_sll_epi32 (__m512i __A, __m128i __B) 1292 { 1293 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1294 (__v4si) __B, 1295 (__v16si) 1296 _mm512_undefined_epi32 (), 1297 (__mmask16) -1); 1298 } 1299 1300 extern __inline __m512i 1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1302 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1303 { 1304 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1305 (__v4si) __B, 1306 (__v16si) __W, 1307 (__mmask16) __U); 1308 } 1309 1310 extern __inline __m512i 1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1312 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1313 { 1314 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1315 (__v4si) __B, 1316 (__v16si) 1317 _mm512_setzero_si512 (), 1318 (__mmask16) __U); 1319 } 1320 1321 #ifdef __OPTIMIZE__ 1322 extern __inline __m512i 1323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1324 _mm512_srli_epi32 (__m512i __A, unsigned int __B) 1325 { 1326 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1327 (__v16si) 1328 _mm512_undefined_epi32 (), 1329 (__mmask16) -1); 1330 } 1331 1332 extern __inline __m512i 1333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1334 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, 1335 __m512i __A, unsigned int __B) 1336 { 1337 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1338 (__v16si) __W, 1339 (__mmask16) __U); 1340 } 1341 1342 extern __inline __m512i 1343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1344 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1345 { 1346 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1347 (__v16si) 1348 _mm512_setzero_si512 (), 1349 (__mmask16) __U); 1350 } 1351 #else 1352 #define _mm512_srli_epi32(X, C) \ 1353 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1354 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1355 (__mmask16)-1)) 1356 1357 #define _mm512_mask_srli_epi32(W, U, X, C) \ 1358 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1359 (__v16si)(__m512i)(W),\ 1360 (__mmask16)(U))) 1361 1362 #define _mm512_maskz_srli_epi32(U, X, C) \ 1363 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1364 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1365 (__mmask16)(U))) 1366 #endif 1367 1368 extern __inline __m512i 1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1370 _mm512_srl_epi32 (__m512i __A, __m128i __B) 1371 { 1372 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1373 (__v4si) __B, 1374 (__v16si) 1375 _mm512_undefined_epi32 (), 1376 (__mmask16) -1); 1377 } 1378 1379 extern __inline __m512i 1380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1381 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1382 { 1383 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1384 (__v4si) __B, 1385 (__v16si) __W, 1386 (__mmask16) __U); 1387 } 1388 1389 extern __inline __m512i 1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1391 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1392 { 1393 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1394 (__v4si) __B, 1395 (__v16si) 1396 _mm512_setzero_si512 (), 1397 (__mmask16) __U); 1398 } 1399 1400 #ifdef __OPTIMIZE__ 1401 extern __inline __m512i 1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1403 _mm512_srai_epi32 (__m512i __A, unsigned int __B) 1404 { 1405 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1406 (__v16si) 1407 _mm512_undefined_epi32 (), 1408 (__mmask16) -1); 1409 } 1410 1411 extern __inline __m512i 1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1413 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1414 unsigned int __B) 1415 { 1416 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1417 (__v16si) __W, 1418 (__mmask16) __U); 1419 } 1420 1421 extern __inline __m512i 1422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1423 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1424 { 1425 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1426 (__v16si) 1427 _mm512_setzero_si512 (), 1428 (__mmask16) __U); 1429 } 1430 #else 1431 #define _mm512_srai_epi32(X, C) \ 1432 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1433 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1434 (__mmask16)-1)) 1435 1436 #define _mm512_mask_srai_epi32(W, U, X, C) \ 1437 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1438 (__v16si)(__m512i)(W),\ 1439 (__mmask16)(U))) 1440 1441 #define _mm512_maskz_srai_epi32(U, X, C) \ 1442 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1443 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1444 (__mmask16)(U))) 1445 #endif 1446 1447 extern __inline __m512i 1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1449 _mm512_sra_epi32 (__m512i __A, __m128i __B) 1450 { 1451 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1452 (__v4si) __B, 1453 (__v16si) 1454 _mm512_undefined_epi32 (), 1455 (__mmask16) -1); 1456 } 1457 1458 extern __inline __m512i 1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1460 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1461 { 1462 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1463 (__v4si) __B, 1464 (__v16si) __W, 1465 (__mmask16) __U); 1466 } 1467 1468 extern __inline __m512i 1469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1470 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1471 { 1472 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1473 (__v4si) __B, 1474 (__v16si) 1475 _mm512_setzero_si512 (), 1476 (__mmask16) __U); 1477 } 1478 1479 #ifdef __OPTIMIZE__ 1480 extern __inline __m128d 1481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1482 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) 1483 { 1484 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1485 (__v2df) __B, 1486 __R); 1487 } 1488 1489 extern __inline __m128d 1490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1491 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 1492 __m128d __B, const int __R) 1493 { 1494 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 1495 (__v2df) __B, 1496 (__v2df) __W, 1497 (__mmask8) __U, __R); 1498 } 1499 1500 extern __inline __m128d 1501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1502 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 1503 const int __R) 1504 { 1505 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 1506 (__v2df) __B, 1507 (__v2df) 1508 _mm_setzero_pd (), 1509 (__mmask8) __U, __R); 1510 } 1511 1512 extern __inline __m128 1513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1514 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) 1515 { 1516 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1517 (__v4sf) __B, 1518 __R); 1519 } 1520 1521 extern __inline __m128 1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1523 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 1524 __m128 __B, const int __R) 1525 { 1526 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 1527 (__v4sf) __B, 1528 (__v4sf) __W, 1529 (__mmask8) __U, __R); 1530 } 1531 1532 extern __inline __m128 1533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1534 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 1535 const int __R) 1536 { 1537 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 1538 (__v4sf) __B, 1539 (__v4sf) 1540 _mm_setzero_ps (), 1541 (__mmask8) __U, __R); 1542 } 1543 1544 extern __inline __m128d 1545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1546 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) 1547 { 1548 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1549 (__v2df) __B, 1550 __R); 1551 } 1552 1553 extern __inline __m128d 1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1555 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 1556 __m128d __B, const int __R) 1557 { 1558 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 1559 (__v2df) __B, 1560 (__v2df) __W, 1561 (__mmask8) __U, __R); 1562 } 1563 1564 extern __inline __m128d 1565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1566 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 1567 const int __R) 1568 { 1569 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 1570 (__v2df) __B, 1571 (__v2df) 1572 _mm_setzero_pd (), 1573 (__mmask8) __U, __R); 1574 } 1575 1576 extern __inline __m128 1577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1578 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) 1579 { 1580 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1581 (__v4sf) __B, 1582 __R); 1583 } 1584 1585 extern __inline __m128 1586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1587 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 1588 __m128 __B, const int __R) 1589 { 1590 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 1591 (__v4sf) __B, 1592 (__v4sf) __W, 1593 (__mmask8) __U, __R); 1594 } 1595 1596 extern __inline __m128 1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1598 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 1599 const int __R) 1600 { 1601 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 1602 (__v4sf) __B, 1603 (__v4sf) 1604 _mm_setzero_ps (), 1605 (__mmask8) __U, __R); 1606 } 1607 1608 #else 1609 #define _mm_add_round_sd(A, B, C) \ 1610 (__m128d)__builtin_ia32_addsd_round(A, B, C) 1611 1612 #define _mm_mask_add_round_sd(W, U, A, B, C) \ 1613 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C) 1614 1615 #define _mm_maskz_add_round_sd(U, A, B, C) \ 1616 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 1617 1618 #define _mm_add_round_ss(A, B, C) \ 1619 (__m128)__builtin_ia32_addss_round(A, B, C) 1620 1621 #define _mm_mask_add_round_ss(W, U, A, B, C) \ 1622 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C) 1623 1624 #define _mm_maskz_add_round_ss(U, A, B, C) \ 1625 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 1626 1627 #define _mm_sub_round_sd(A, B, C) \ 1628 (__m128d)__builtin_ia32_subsd_round(A, B, C) 1629 1630 #define _mm_mask_sub_round_sd(W, U, A, B, C) \ 1631 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C) 1632 1633 #define _mm_maskz_sub_round_sd(U, A, B, C) \ 1634 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 1635 1636 #define _mm_sub_round_ss(A, B, C) \ 1637 (__m128)__builtin_ia32_subss_round(A, B, C) 1638 1639 #define _mm_mask_sub_round_ss(W, U, A, B, C) \ 1640 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) 1641 1642 #define _mm_maskz_sub_round_ss(U, A, B, C) \ 1643 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 1644 1645 #endif 1646 1647 #ifdef __OPTIMIZE__ 1648 extern __inline __m512i 1649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1650 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, 1651 const int __imm) 1652 { 1653 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1654 (__v8di) __B, 1655 (__v8di) __C, __imm, 1656 (__mmask8) -1); 1657 } 1658 1659 extern __inline __m512i 1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1661 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, 1662 __m512i __C, const int __imm) 1663 { 1664 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1665 (__v8di) __B, 1666 (__v8di) __C, __imm, 1667 (__mmask8) __U); 1668 } 1669 1670 extern __inline __m512i 1671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1672 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 1673 __m512i __C, const int __imm) 1674 { 1675 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, 1676 (__v8di) __B, 1677 (__v8di) __C, 1678 __imm, (__mmask8) __U); 1679 } 1680 1681 extern __inline __m512i 1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1683 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, 1684 const int __imm) 1685 { 1686 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1687 (__v16si) __B, 1688 (__v16si) __C, 1689 __imm, (__mmask16) -1); 1690 } 1691 1692 extern __inline __m512i 1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1694 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, 1695 __m512i __C, const int __imm) 1696 { 1697 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1698 (__v16si) __B, 1699 (__v16si) __C, 1700 __imm, (__mmask16) __U); 1701 } 1702 1703 extern __inline __m512i 1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1705 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 1706 __m512i __C, const int __imm) 1707 { 1708 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, 1709 (__v16si) __B, 1710 (__v16si) __C, 1711 __imm, (__mmask16) __U); 1712 } 1713 #else 1714 #define _mm512_ternarylogic_epi64(A, B, C, I) \ 1715 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1716 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) 1717 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ 1718 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1719 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1720 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ 1721 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ 1722 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1723 #define _mm512_ternarylogic_epi32(A, B, C, I) \ 1724 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1725 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1726 (__mmask16)-1)) 1727 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ 1728 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1729 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1730 (__mmask16)(U))) 1731 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ 1732 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ 1733 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1734 (__mmask16)(U))) 1735 #endif 1736 1737 extern __inline __m512d 1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1739 _mm512_rcp14_pd (__m512d __A) 1740 { 1741 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1742 (__v8df) 1743 _mm512_undefined_pd (), 1744 (__mmask8) -1); 1745 } 1746 1747 extern __inline __m512d 1748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1749 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1750 { 1751 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1752 (__v8df) __W, 1753 (__mmask8) __U); 1754 } 1755 1756 extern __inline __m512d 1757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1758 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1759 { 1760 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1761 (__v8df) 1762 _mm512_setzero_pd (), 1763 (__mmask8) __U); 1764 } 1765 1766 extern __inline __m512 1767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1768 _mm512_rcp14_ps (__m512 __A) 1769 { 1770 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1771 (__v16sf) 1772 _mm512_undefined_ps (), 1773 (__mmask16) -1); 1774 } 1775 1776 extern __inline __m512 1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1778 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1779 { 1780 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1781 (__v16sf) __W, 1782 (__mmask16) __U); 1783 } 1784 1785 extern __inline __m512 1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1787 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1788 { 1789 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1790 (__v16sf) 1791 _mm512_setzero_ps (), 1792 (__mmask16) __U); 1793 } 1794 1795 extern __inline __m128d 1796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1797 _mm_rcp14_sd (__m128d __A, __m128d __B) 1798 { 1799 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, 1800 (__v2df) __A); 1801 } 1802 1803 extern __inline __m128d 1804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1805 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1806 { 1807 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, 1808 (__v2df) __A, 1809 (__v2df) __W, 1810 (__mmask8) __U); 1811 } 1812 1813 extern __inline __m128d 1814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1815 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1816 { 1817 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, 1818 (__v2df) __A, 1819 (__v2df) _mm_setzero_ps (), 1820 (__mmask8) __U); 1821 } 1822 1823 extern __inline __m128 1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1825 _mm_rcp14_ss (__m128 __A, __m128 __B) 1826 { 1827 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, 1828 (__v4sf) __A); 1829 } 1830 1831 extern __inline __m128 1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1833 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1834 { 1835 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, 1836 (__v4sf) __A, 1837 (__v4sf) __W, 1838 (__mmask8) __U); 1839 } 1840 1841 extern __inline __m128 1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1843 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1844 { 1845 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, 1846 (__v4sf) __A, 1847 (__v4sf) _mm_setzero_ps (), 1848 (__mmask8) __U); 1849 } 1850 1851 extern __inline __m512d 1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1853 _mm512_rsqrt14_pd (__m512d __A) 1854 { 1855 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1856 (__v8df) 1857 _mm512_undefined_pd (), 1858 (__mmask8) -1); 1859 } 1860 1861 extern __inline __m512d 1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1863 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1864 { 1865 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1866 (__v8df) __W, 1867 (__mmask8) __U); 1868 } 1869 1870 extern __inline __m512d 1871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1872 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1873 { 1874 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1875 (__v8df) 1876 _mm512_setzero_pd (), 1877 (__mmask8) __U); 1878 } 1879 1880 extern __inline __m512 1881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1882 _mm512_rsqrt14_ps (__m512 __A) 1883 { 1884 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1885 (__v16sf) 1886 _mm512_undefined_ps (), 1887 (__mmask16) -1); 1888 } 1889 1890 extern __inline __m512 1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1892 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1893 { 1894 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1895 (__v16sf) __W, 1896 (__mmask16) __U); 1897 } 1898 1899 extern __inline __m512 1900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1901 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1902 { 1903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1904 (__v16sf) 1905 _mm512_setzero_ps (), 1906 (__mmask16) __U); 1907 } 1908 1909 extern __inline __m128d 1910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1911 _mm_rsqrt14_sd (__m128d __A, __m128d __B) 1912 { 1913 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, 1914 (__v2df) __A); 1915 } 1916 1917 extern __inline __m128d 1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1919 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1920 { 1921 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, 1922 (__v2df) __A, 1923 (__v2df) __W, 1924 (__mmask8) __U); 1925 } 1926 1927 extern __inline __m128d 1928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1929 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1930 { 1931 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, 1932 (__v2df) __A, 1933 (__v2df) _mm_setzero_pd (), 1934 (__mmask8) __U); 1935 } 1936 1937 extern __inline __m128 1938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1939 _mm_rsqrt14_ss (__m128 __A, __m128 __B) 1940 { 1941 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, 1942 (__v4sf) __A); 1943 } 1944 1945 extern __inline __m128 1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1947 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1948 { 1949 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, 1950 (__v4sf) __A, 1951 (__v4sf) __W, 1952 (__mmask8) __U); 1953 } 1954 1955 extern __inline __m128 1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1957 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1958 { 1959 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, 1960 (__v4sf) __A, 1961 (__v4sf) _mm_setzero_ps (), 1962 (__mmask8) __U); 1963 } 1964 1965 #ifdef __OPTIMIZE__ 1966 extern __inline __m512d 1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1968 _mm512_sqrt_round_pd (__m512d __A, const int __R) 1969 { 1970 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1971 (__v8df) 1972 _mm512_undefined_pd (), 1973 (__mmask8) -1, __R); 1974 } 1975 1976 extern __inline __m512d 1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1978 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 1979 const int __R) 1980 { 1981 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1982 (__v8df) __W, 1983 (__mmask8) __U, __R); 1984 } 1985 1986 extern __inline __m512d 1987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1988 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) 1989 { 1990 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1991 (__v8df) 1992 _mm512_setzero_pd (), 1993 (__mmask8) __U, __R); 1994 } 1995 1996 extern __inline __m512 1997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1998 _mm512_sqrt_round_ps (__m512 __A, const int __R) 1999 { 2000 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 2001 (__v16sf) 2002 _mm512_undefined_ps (), 2003 (__mmask16) -1, __R); 2004 } 2005 2006 extern __inline __m512 2007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2008 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) 2009 { 2010 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 2011 (__v16sf) __W, 2012 (__mmask16) __U, __R); 2013 } 2014 2015 extern __inline __m512 2016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2017 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) 2018 { 2019 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 2020 (__v16sf) 2021 _mm512_setzero_ps (), 2022 (__mmask16) __U, __R); 2023 } 2024 2025 extern __inline __m128d 2026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2027 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) 2028 { 2029 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 2030 (__v2df) __A, 2031 (__v2df) 2032 _mm_setzero_pd (), 2033 (__mmask8) -1, __R); 2034 } 2035 2036 extern __inline __m128d 2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2038 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 2039 const int __R) 2040 { 2041 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 2042 (__v2df) __A, 2043 (__v2df) __W, 2044 (__mmask8) __U, __R); 2045 } 2046 2047 extern __inline __m128d 2048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2049 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R) 2050 { 2051 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 2052 (__v2df) __A, 2053 (__v2df) 2054 _mm_setzero_pd (), 2055 (__mmask8) __U, __R); 2056 } 2057 2058 extern __inline __m128 2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2060 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) 2061 { 2062 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 2063 (__v4sf) __A, 2064 (__v4sf) 2065 _mm_setzero_ps (), 2066 (__mmask8) -1, __R); 2067 } 2068 2069 extern __inline __m128 2070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2071 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 2072 const int __R) 2073 { 2074 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 2075 (__v4sf) __A, 2076 (__v4sf) __W, 2077 (__mmask8) __U, __R); 2078 } 2079 2080 extern __inline __m128 2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2082 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) 2083 { 2084 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 2085 (__v4sf) __A, 2086 (__v4sf) 2087 _mm_setzero_ps (), 2088 (__mmask8) __U, __R); 2089 } 2090 #else 2091 #define _mm512_sqrt_round_pd(A, C) \ 2092 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) 2093 2094 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \ 2095 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) 2096 2097 #define _mm512_maskz_sqrt_round_pd(U, A, C) \ 2098 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 2099 2100 #define _mm512_sqrt_round_ps(A, C) \ 2101 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) 2102 2103 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \ 2104 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) 2105 2106 #define _mm512_maskz_sqrt_round_ps(U, A, C) \ 2107 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 2108 2109 #define _mm_sqrt_round_sd(A, B, C) \ 2110 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ 2111 (__v2df) _mm_setzero_pd (), -1, C) 2112 2113 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \ 2114 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C) 2115 2116 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \ 2117 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ 2118 (__v2df) _mm_setzero_pd (), U, C) 2119 2120 #define _mm_sqrt_round_ss(A, B, C) \ 2121 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ 2122 (__v4sf) _mm_setzero_ps (), -1, C) 2123 2124 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \ 2125 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C) 2126 2127 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \ 2128 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ 2129 (__v4sf) _mm_setzero_ps (), U, C) 2130 #endif 2131 2132 extern __inline __m512i 2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2134 _mm512_cvtepi8_epi32 (__m128i __A) 2135 { 2136 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2137 (__v16si) 2138 _mm512_undefined_epi32 (), 2139 (__mmask16) -1); 2140 } 2141 2142 extern __inline __m512i 2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2144 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 2145 { 2146 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2147 (__v16si) __W, 2148 (__mmask16) __U); 2149 } 2150 2151 extern __inline __m512i 2152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2153 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) 2154 { 2155 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2156 (__v16si) 2157 _mm512_setzero_si512 (), 2158 (__mmask16) __U); 2159 } 2160 2161 extern __inline __m512i 2162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2163 _mm512_cvtepi8_epi64 (__m128i __A) 2164 { 2165 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2166 (__v8di) 2167 _mm512_undefined_epi32 (), 2168 (__mmask8) -1); 2169 } 2170 2171 extern __inline __m512i 2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2173 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2174 { 2175 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2176 (__v8di) __W, 2177 (__mmask8) __U); 2178 } 2179 2180 extern __inline __m512i 2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2182 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 2183 { 2184 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2185 (__v8di) 2186 _mm512_setzero_si512 (), 2187 (__mmask8) __U); 2188 } 2189 2190 extern __inline __m512i 2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2192 _mm512_cvtepi16_epi32 (__m256i __A) 2193 { 2194 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2195 (__v16si) 2196 _mm512_undefined_epi32 (), 2197 (__mmask16) -1); 2198 } 2199 2200 extern __inline __m512i 2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2202 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 2203 { 2204 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2205 (__v16si) __W, 2206 (__mmask16) __U); 2207 } 2208 2209 extern __inline __m512i 2210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2211 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) 2212 { 2213 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2214 (__v16si) 2215 _mm512_setzero_si512 (), 2216 (__mmask16) __U); 2217 } 2218 2219 extern __inline __m512i 2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2221 _mm512_cvtepi16_epi64 (__m128i __A) 2222 { 2223 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2224 (__v8di) 2225 _mm512_undefined_epi32 (), 2226 (__mmask8) -1); 2227 } 2228 2229 extern __inline __m512i 2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2231 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2232 { 2233 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2234 (__v8di) __W, 2235 (__mmask8) __U); 2236 } 2237 2238 extern __inline __m512i 2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2240 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 2241 { 2242 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2243 (__v8di) 2244 _mm512_setzero_si512 (), 2245 (__mmask8) __U); 2246 } 2247 2248 extern __inline __m512i 2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2250 _mm512_cvtepi32_epi64 (__m256i __X) 2251 { 2252 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2253 (__v8di) 2254 _mm512_undefined_epi32 (), 2255 (__mmask8) -1); 2256 } 2257 2258 extern __inline __m512i 2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2260 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2261 { 2262 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2263 (__v8di) __W, 2264 (__mmask8) __U); 2265 } 2266 2267 extern __inline __m512i 2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2269 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) 2270 { 2271 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2272 (__v8di) 2273 _mm512_setzero_si512 (), 2274 (__mmask8) __U); 2275 } 2276 2277 extern __inline __m512i 2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2279 _mm512_cvtepu8_epi32 (__m128i __A) 2280 { 2281 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2282 (__v16si) 2283 _mm512_undefined_epi32 (), 2284 (__mmask16) -1); 2285 } 2286 2287 extern __inline __m512i 2288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2289 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 2290 { 2291 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2292 (__v16si) __W, 2293 (__mmask16) __U); 2294 } 2295 2296 extern __inline __m512i 2297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2298 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) 2299 { 2300 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2301 (__v16si) 2302 _mm512_setzero_si512 (), 2303 (__mmask16) __U); 2304 } 2305 2306 extern __inline __m512i 2307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2308 _mm512_cvtepu8_epi64 (__m128i __A) 2309 { 2310 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2311 (__v8di) 2312 _mm512_undefined_epi32 (), 2313 (__mmask8) -1); 2314 } 2315 2316 extern __inline __m512i 2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2318 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2319 { 2320 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2321 (__v8di) __W, 2322 (__mmask8) __U); 2323 } 2324 2325 extern __inline __m512i 2326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2327 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 2328 { 2329 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2330 (__v8di) 2331 _mm512_setzero_si512 (), 2332 (__mmask8) __U); 2333 } 2334 2335 extern __inline __m512i 2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2337 _mm512_cvtepu16_epi32 (__m256i __A) 2338 { 2339 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2340 (__v16si) 2341 _mm512_undefined_epi32 (), 2342 (__mmask16) -1); 2343 } 2344 2345 extern __inline __m512i 2346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2347 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 2348 { 2349 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2350 (__v16si) __W, 2351 (__mmask16) __U); 2352 } 2353 2354 extern __inline __m512i 2355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2356 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) 2357 { 2358 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2359 (__v16si) 2360 _mm512_setzero_si512 (), 2361 (__mmask16) __U); 2362 } 2363 2364 extern __inline __m512i 2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2366 _mm512_cvtepu16_epi64 (__m128i __A) 2367 { 2368 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2369 (__v8di) 2370 _mm512_undefined_epi32 (), 2371 (__mmask8) -1); 2372 } 2373 2374 extern __inline __m512i 2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2376 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2377 { 2378 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2379 (__v8di) __W, 2380 (__mmask8) __U); 2381 } 2382 2383 extern __inline __m512i 2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2385 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 2386 { 2387 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2388 (__v8di) 2389 _mm512_setzero_si512 (), 2390 (__mmask8) __U); 2391 } 2392 2393 extern __inline __m512i 2394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2395 _mm512_cvtepu32_epi64 (__m256i __X) 2396 { 2397 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2398 (__v8di) 2399 _mm512_undefined_epi32 (), 2400 (__mmask8) -1); 2401 } 2402 2403 extern __inline __m512i 2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2405 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2406 { 2407 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2408 (__v8di) __W, 2409 (__mmask8) __U); 2410 } 2411 2412 extern __inline __m512i 2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2414 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) 2415 { 2416 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2417 (__v8di) 2418 _mm512_setzero_si512 (), 2419 (__mmask8) __U); 2420 } 2421 2422 #ifdef __OPTIMIZE__ 2423 extern __inline __m512d 2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2425 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) 2426 { 2427 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2428 (__v8df) __B, 2429 (__v8df) 2430 _mm512_undefined_pd (), 2431 (__mmask8) -1, __R); 2432 } 2433 2434 extern __inline __m512d 2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2436 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2437 __m512d __B, const int __R) 2438 { 2439 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2440 (__v8df) __B, 2441 (__v8df) __W, 2442 (__mmask8) __U, __R); 2443 } 2444 2445 extern __inline __m512d 2446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2447 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2448 const int __R) 2449 { 2450 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2451 (__v8df) __B, 2452 (__v8df) 2453 _mm512_setzero_pd (), 2454 (__mmask8) __U, __R); 2455 } 2456 2457 extern __inline __m512 2458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2459 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) 2460 { 2461 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2462 (__v16sf) __B, 2463 (__v16sf) 2464 _mm512_undefined_ps (), 2465 (__mmask16) -1, __R); 2466 } 2467 2468 extern __inline __m512 2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2470 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2471 __m512 __B, const int __R) 2472 { 2473 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2474 (__v16sf) __B, 2475 (__v16sf) __W, 2476 (__mmask16) __U, __R); 2477 } 2478 2479 extern __inline __m512 2480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2481 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2482 { 2483 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2484 (__v16sf) __B, 2485 (__v16sf) 2486 _mm512_setzero_ps (), 2487 (__mmask16) __U, __R); 2488 } 2489 2490 extern __inline __m512d 2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2492 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) 2493 { 2494 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2495 (__v8df) __B, 2496 (__v8df) 2497 _mm512_undefined_pd (), 2498 (__mmask8) -1, __R); 2499 } 2500 2501 extern __inline __m512d 2502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2503 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2504 __m512d __B, const int __R) 2505 { 2506 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2507 (__v8df) __B, 2508 (__v8df) __W, 2509 (__mmask8) __U, __R); 2510 } 2511 2512 extern __inline __m512d 2513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2514 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2515 const int __R) 2516 { 2517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2518 (__v8df) __B, 2519 (__v8df) 2520 _mm512_setzero_pd (), 2521 (__mmask8) __U, __R); 2522 } 2523 2524 extern __inline __m512 2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2526 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) 2527 { 2528 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2529 (__v16sf) __B, 2530 (__v16sf) 2531 _mm512_undefined_ps (), 2532 (__mmask16) -1, __R); 2533 } 2534 2535 extern __inline __m512 2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2537 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2538 __m512 __B, const int __R) 2539 { 2540 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2541 (__v16sf) __B, 2542 (__v16sf) __W, 2543 (__mmask16) __U, __R); 2544 } 2545 2546 extern __inline __m512 2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2548 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2549 { 2550 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2551 (__v16sf) __B, 2552 (__v16sf) 2553 _mm512_setzero_ps (), 2554 (__mmask16) __U, __R); 2555 } 2556 #else 2557 #define _mm512_add_round_pd(A, B, C) \ 2558 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2559 2560 #define _mm512_mask_add_round_pd(W, U, A, B, C) \ 2561 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) 2562 2563 #define _mm512_maskz_add_round_pd(U, A, B, C) \ 2564 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2565 2566 #define _mm512_add_round_ps(A, B, C) \ 2567 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2568 2569 #define _mm512_mask_add_round_ps(W, U, A, B, C) \ 2570 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) 2571 2572 #define _mm512_maskz_add_round_ps(U, A, B, C) \ 2573 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2574 2575 #define _mm512_sub_round_pd(A, B, C) \ 2576 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2577 2578 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \ 2579 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) 2580 2581 #define _mm512_maskz_sub_round_pd(U, A, B, C) \ 2582 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2583 2584 #define _mm512_sub_round_ps(A, B, C) \ 2585 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2586 2587 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \ 2588 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) 2589 2590 #define _mm512_maskz_sub_round_ps(U, A, B, C) \ 2591 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2592 #endif 2593 2594 #ifdef __OPTIMIZE__ 2595 extern __inline __m512d 2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2597 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) 2598 { 2599 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2600 (__v8df) __B, 2601 (__v8df) 2602 _mm512_undefined_pd (), 2603 (__mmask8) -1, __R); 2604 } 2605 2606 extern __inline __m512d 2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2608 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2609 __m512d __B, const int __R) 2610 { 2611 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2612 (__v8df) __B, 2613 (__v8df) __W, 2614 (__mmask8) __U, __R); 2615 } 2616 2617 extern __inline __m512d 2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2619 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2620 const int __R) 2621 { 2622 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2623 (__v8df) __B, 2624 (__v8df) 2625 _mm512_setzero_pd (), 2626 (__mmask8) __U, __R); 2627 } 2628 2629 extern __inline __m512 2630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2631 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) 2632 { 2633 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2634 (__v16sf) __B, 2635 (__v16sf) 2636 _mm512_undefined_ps (), 2637 (__mmask16) -1, __R); 2638 } 2639 2640 extern __inline __m512 2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2642 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2643 __m512 __B, const int __R) 2644 { 2645 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2646 (__v16sf) __B, 2647 (__v16sf) __W, 2648 (__mmask16) __U, __R); 2649 } 2650 2651 extern __inline __m512 2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2653 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2654 { 2655 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2656 (__v16sf) __B, 2657 (__v16sf) 2658 _mm512_setzero_ps (), 2659 (__mmask16) __U, __R); 2660 } 2661 2662 extern __inline __m512d 2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2664 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) 2665 { 2666 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2667 (__v8df) __V, 2668 (__v8df) 2669 _mm512_undefined_pd (), 2670 (__mmask8) -1, __R); 2671 } 2672 2673 extern __inline __m512d 2674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2675 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, 2676 __m512d __V, const int __R) 2677 { 2678 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2679 (__v8df) __V, 2680 (__v8df) __W, 2681 (__mmask8) __U, __R); 2682 } 2683 2684 extern __inline __m512d 2685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2686 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, 2687 const int __R) 2688 { 2689 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2690 (__v8df) __V, 2691 (__v8df) 2692 _mm512_setzero_pd (), 2693 (__mmask8) __U, __R); 2694 } 2695 2696 extern __inline __m512 2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2698 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) 2699 { 2700 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2701 (__v16sf) __B, 2702 (__v16sf) 2703 _mm512_undefined_ps (), 2704 (__mmask16) -1, __R); 2705 } 2706 2707 extern __inline __m512 2708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2709 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2710 __m512 __B, const int __R) 2711 { 2712 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2713 (__v16sf) __B, 2714 (__v16sf) __W, 2715 (__mmask16) __U, __R); 2716 } 2717 2718 extern __inline __m512 2719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2720 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2721 { 2722 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2723 (__v16sf) __B, 2724 (__v16sf) 2725 _mm512_setzero_ps (), 2726 (__mmask16) __U, __R); 2727 } 2728 2729 extern __inline __m128d 2730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2731 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) 2732 { 2733 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 2734 (__v2df) __B, 2735 __R); 2736 } 2737 2738 extern __inline __m128d 2739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2740 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 2741 __m128d __B, const int __R) 2742 { 2743 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 2744 (__v2df) __B, 2745 (__v2df) __W, 2746 (__mmask8) __U, __R); 2747 } 2748 2749 extern __inline __m128d 2750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2751 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 2752 const int __R) 2753 { 2754 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 2755 (__v2df) __B, 2756 (__v2df) 2757 _mm_setzero_pd (), 2758 (__mmask8) __U, __R); 2759 } 2760 2761 extern __inline __m128 2762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2763 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) 2764 { 2765 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 2766 (__v4sf) __B, 2767 __R); 2768 } 2769 2770 extern __inline __m128 2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2772 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 2773 __m128 __B, const int __R) 2774 { 2775 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 2776 (__v4sf) __B, 2777 (__v4sf) __W, 2778 (__mmask8) __U, __R); 2779 } 2780 2781 extern __inline __m128 2782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2783 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 2784 const int __R) 2785 { 2786 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 2787 (__v4sf) __B, 2788 (__v4sf) 2789 _mm_setzero_ps (), 2790 (__mmask8) __U, __R); 2791 } 2792 2793 extern __inline __m128d 2794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2795 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) 2796 { 2797 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 2798 (__v2df) __B, 2799 __R); 2800 } 2801 2802 extern __inline __m128d 2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2804 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 2805 __m128d __B, const int __R) 2806 { 2807 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 2808 (__v2df) __B, 2809 (__v2df) __W, 2810 (__mmask8) __U, __R); 2811 } 2812 2813 extern __inline __m128d 2814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2815 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 2816 const int __R) 2817 { 2818 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 2819 (__v2df) __B, 2820 (__v2df) 2821 _mm_setzero_pd (), 2822 (__mmask8) __U, __R); 2823 } 2824 2825 extern __inline __m128 2826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2827 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) 2828 { 2829 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 2830 (__v4sf) __B, 2831 __R); 2832 } 2833 2834 extern __inline __m128 2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2836 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 2837 __m128 __B, const int __R) 2838 { 2839 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 2840 (__v4sf) __B, 2841 (__v4sf) __W, 2842 (__mmask8) __U, __R); 2843 } 2844 2845 extern __inline __m128 2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2847 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 2848 const int __R) 2849 { 2850 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 2851 (__v4sf) __B, 2852 (__v4sf) 2853 _mm_setzero_ps (), 2854 (__mmask8) __U, __R); 2855 } 2856 2857 #else 2858 #define _mm512_mul_round_pd(A, B, C) \ 2859 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2860 2861 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \ 2862 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) 2863 2864 #define _mm512_maskz_mul_round_pd(U, A, B, C) \ 2865 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2866 2867 #define _mm512_mul_round_ps(A, B, C) \ 2868 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2869 2870 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \ 2871 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) 2872 2873 #define _mm512_maskz_mul_round_ps(U, A, B, C) \ 2874 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2875 2876 #define _mm512_div_round_pd(A, B, C) \ 2877 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2878 2879 #define _mm512_mask_div_round_pd(W, U, A, B, C) \ 2880 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) 2881 2882 #define _mm512_maskz_div_round_pd(U, A, B, C) \ 2883 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2884 2885 #define _mm512_div_round_ps(A, B, C) \ 2886 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2887 2888 #define _mm512_mask_div_round_ps(W, U, A, B, C) \ 2889 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) 2890 2891 #define _mm512_maskz_div_round_ps(U, A, B, C) \ 2892 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2893 2894 #define _mm_mul_round_sd(A, B, C) \ 2895 (__m128d)__builtin_ia32_mulsd_round(A, B, C) 2896 2897 #define _mm_mask_mul_round_sd(W, U, A, B, C) \ 2898 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C) 2899 2900 #define _mm_maskz_mul_round_sd(U, A, B, C) \ 2901 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 2902 2903 #define _mm_mul_round_ss(A, B, C) \ 2904 (__m128)__builtin_ia32_mulss_round(A, B, C) 2905 2906 #define _mm_mask_mul_round_ss(W, U, A, B, C) \ 2907 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C) 2908 2909 #define _mm_maskz_mul_round_ss(U, A, B, C) \ 2910 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 2911 2912 #define _mm_div_round_sd(A, B, C) \ 2913 (__m128d)__builtin_ia32_divsd_round(A, B, C) 2914 2915 #define _mm_mask_div_round_sd(W, U, A, B, C) \ 2916 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C) 2917 2918 #define _mm_maskz_div_round_sd(U, A, B, C) \ 2919 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 2920 2921 #define _mm_div_round_ss(A, B, C) \ 2922 (__m128)__builtin_ia32_divss_round(A, B, C) 2923 2924 #define _mm_mask_div_round_ss(W, U, A, B, C) \ 2925 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C) 2926 2927 #define _mm_maskz_div_round_ss(U, A, B, C) \ 2928 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 2929 2930 #endif 2931 2932 #ifdef __OPTIMIZE__ 2933 extern __inline __m512d 2934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2935 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) 2936 { 2937 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2938 (__v8df) __B, 2939 (__v8df) 2940 _mm512_undefined_pd (), 2941 (__mmask8) -1, __R); 2942 } 2943 2944 extern __inline __m512d 2945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2946 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2947 __m512d __B, const int __R) 2948 { 2949 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2950 (__v8df) __B, 2951 (__v8df) __W, 2952 (__mmask8) __U, __R); 2953 } 2954 2955 extern __inline __m512d 2956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2957 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2958 const int __R) 2959 { 2960 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2961 (__v8df) __B, 2962 (__v8df) 2963 _mm512_setzero_pd (), 2964 (__mmask8) __U, __R); 2965 } 2966 2967 extern __inline __m512 2968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2969 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) 2970 { 2971 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2972 (__v16sf) __B, 2973 (__v16sf) 2974 _mm512_undefined_ps (), 2975 (__mmask16) -1, __R); 2976 } 2977 2978 extern __inline __m512 2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2980 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2981 __m512 __B, const int __R) 2982 { 2983 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2984 (__v16sf) __B, 2985 (__v16sf) __W, 2986 (__mmask16) __U, __R); 2987 } 2988 2989 extern __inline __m512 2990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2991 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2992 { 2993 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2994 (__v16sf) __B, 2995 (__v16sf) 2996 _mm512_setzero_ps (), 2997 (__mmask16) __U, __R); 2998 } 2999 3000 extern __inline __m512d 3001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3002 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) 3003 { 3004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 3005 (__v8df) __B, 3006 (__v8df) 3007 _mm512_undefined_pd (), 3008 (__mmask8) -1, __R); 3009 } 3010 3011 extern __inline __m512d 3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3013 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 3014 __m512d __B, const int __R) 3015 { 3016 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 3017 (__v8df) __B, 3018 (__v8df) __W, 3019 (__mmask8) __U, __R); 3020 } 3021 3022 extern __inline __m512d 3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3024 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3025 const int __R) 3026 { 3027 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 3028 (__v8df) __B, 3029 (__v8df) 3030 _mm512_setzero_pd (), 3031 (__mmask8) __U, __R); 3032 } 3033 3034 extern __inline __m512 3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3036 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) 3037 { 3038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 3039 (__v16sf) __B, 3040 (__v16sf) 3041 _mm512_undefined_ps (), 3042 (__mmask16) -1, __R); 3043 } 3044 3045 extern __inline __m512 3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3047 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 3048 __m512 __B, const int __R) 3049 { 3050 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 3051 (__v16sf) __B, 3052 (__v16sf) __W, 3053 (__mmask16) __U, __R); 3054 } 3055 3056 extern __inline __m512 3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3058 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 3059 { 3060 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 3061 (__v16sf) __B, 3062 (__v16sf) 3063 _mm512_setzero_ps (), 3064 (__mmask16) __U, __R); 3065 } 3066 #else 3067 #define _mm512_max_round_pd(A, B, R) \ 3068 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 3069 3070 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 3071 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) 3072 3073 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 3074 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 3075 3076 #define _mm512_max_round_ps(A, B, R) \ 3077 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) 3078 3079 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 3080 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) 3081 3082 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 3083 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 3084 3085 #define _mm512_min_round_pd(A, B, R) \ 3086 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 3087 3088 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 3089 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) 3090 3091 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 3092 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 3093 3094 #define _mm512_min_round_ps(A, B, R) \ 3095 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) 3096 3097 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 3098 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) 3099 3100 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 3101 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 3102 #endif 3103 3104 #ifdef __OPTIMIZE__ 3105 extern __inline __m512d 3106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3107 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) 3108 { 3109 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3110 (__v8df) __B, 3111 (__v8df) 3112 _mm512_undefined_pd (), 3113 (__mmask8) -1, __R); 3114 } 3115 3116 extern __inline __m512d 3117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3118 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 3119 __m512d __B, const int __R) 3120 { 3121 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3122 (__v8df) __B, 3123 (__v8df) __W, 3124 (__mmask8) __U, __R); 3125 } 3126 3127 extern __inline __m512d 3128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3129 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3130 const int __R) 3131 { 3132 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3133 (__v8df) __B, 3134 (__v8df) 3135 _mm512_setzero_pd (), 3136 (__mmask8) __U, __R); 3137 } 3138 3139 extern __inline __m512 3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3141 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) 3142 { 3143 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3144 (__v16sf) __B, 3145 (__v16sf) 3146 _mm512_undefined_ps (), 3147 (__mmask16) -1, __R); 3148 } 3149 3150 extern __inline __m512 3151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3152 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 3153 __m512 __B, const int __R) 3154 { 3155 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3156 (__v16sf) __B, 3157 (__v16sf) __W, 3158 (__mmask16) __U, __R); 3159 } 3160 3161 extern __inline __m512 3162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3163 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3164 const int __R) 3165 { 3166 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3167 (__v16sf) __B, 3168 (__v16sf) 3169 _mm512_setzero_ps (), 3170 (__mmask16) __U, __R); 3171 } 3172 3173 extern __inline __m128d 3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3175 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) 3176 { 3177 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3178 (__v2df) __B, 3179 (__v2df) 3180 _mm_setzero_pd (), 3181 (__mmask8) -1, __R); 3182 } 3183 3184 extern __inline __m128d 3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3186 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 3187 const int __R) 3188 { 3189 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3190 (__v2df) __B, 3191 (__v2df) __W, 3192 (__mmask8) __U, __R); 3193 } 3194 3195 extern __inline __m128d 3196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3197 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 3198 const int __R) 3199 { 3200 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3201 (__v2df) __B, 3202 (__v2df) 3203 _mm_setzero_pd (), 3204 (__mmask8) __U, __R); 3205 } 3206 3207 extern __inline __m128 3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3209 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) 3210 { 3211 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3212 (__v4sf) __B, 3213 (__v4sf) 3214 _mm_setzero_ps (), 3215 (__mmask8) -1, __R); 3216 } 3217 3218 extern __inline __m128 3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3220 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 3221 const int __R) 3222 { 3223 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3224 (__v4sf) __B, 3225 (__v4sf) __W, 3226 (__mmask8) __U, __R); 3227 } 3228 3229 extern __inline __m128 3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3231 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) 3232 { 3233 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3234 (__v4sf) __B, 3235 (__v4sf) 3236 _mm_setzero_ps (), 3237 (__mmask8) __U, __R); 3238 } 3239 #else 3240 #define _mm512_scalef_round_pd(A, B, C) \ 3241 ((__m512d) \ 3242 __builtin_ia32_scalefpd512_mask((A), (B), \ 3243 (__v8df) _mm512_undefined_pd(), \ 3244 -1, (C))) 3245 3246 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ 3247 ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) 3248 3249 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \ 3250 ((__m512d) \ 3251 __builtin_ia32_scalefpd512_mask((A), (B), \ 3252 (__v8df) _mm512_setzero_pd(), \ 3253 (U), (C))) 3254 3255 #define _mm512_scalef_round_ps(A, B, C) \ 3256 ((__m512) \ 3257 __builtin_ia32_scalefps512_mask((A), (B), \ 3258 (__v16sf) _mm512_undefined_ps(), \ 3259 -1, (C))) 3260 3261 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ 3262 ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) 3263 3264 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \ 3265 ((__m512) \ 3266 __builtin_ia32_scalefps512_mask((A), (B), \ 3267 (__v16sf) _mm512_setzero_ps(), \ 3268 (U), (C))) 3269 3270 #define _mm_scalef_round_sd(A, B, C) \ 3271 ((__m128d) \ 3272 __builtin_ia32_scalefsd_mask_round ((A), (B), \ 3273 (__v2df) _mm_undefined_pd (), \ 3274 -1, (C))) 3275 3276 #define _mm_scalef_round_ss(A, B, C) \ 3277 ((__m128) \ 3278 __builtin_ia32_scalefss_mask_round ((A), (B), \ 3279 (__v4sf) _mm_undefined_ps (), \ 3280 -1, (C))) 3281 3282 #define _mm_mask_scalef_round_sd(W, U, A, B, C) \ 3283 ((__m128d) \ 3284 __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) 3285 3286 #define _mm_mask_scalef_round_ss(W, U, A, B, C) \ 3287 ((__m128) \ 3288 __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) 3289 3290 #define _mm_maskz_scalef_round_sd(U, A, B, C) \ 3291 ((__m128d) \ 3292 __builtin_ia32_scalefsd_mask_round ((A), (B), \ 3293 (__v2df) _mm_setzero_pd (), \ 3294 (U), (C))) 3295 3296 #define _mm_maskz_scalef_round_ss(U, A, B, C) \ 3297 ((__m128) \ 3298 __builtin_ia32_scalefss_mask_round ((A), (B), \ 3299 (__v4sf) _mm_setzero_ps (), \ 3300 (U), (C))) 3301 #endif 3302 3303 #ifdef __OPTIMIZE__ 3304 extern __inline __m512d 3305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3306 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3307 { 3308 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3309 (__v8df) __B, 3310 (__v8df) __C, 3311 (__mmask8) -1, __R); 3312 } 3313 3314 extern __inline __m512d 3315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3316 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3317 __m512d __C, const int __R) 3318 { 3319 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3320 (__v8df) __B, 3321 (__v8df) __C, 3322 (__mmask8) __U, __R); 3323 } 3324 3325 extern __inline __m512d 3326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3327 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3328 __mmask8 __U, const int __R) 3329 { 3330 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 3331 (__v8df) __B, 3332 (__v8df) __C, 3333 (__mmask8) __U, __R); 3334 } 3335 3336 extern __inline __m512d 3337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3338 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3339 __m512d __C, const int __R) 3340 { 3341 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 3342 (__v8df) __B, 3343 (__v8df) __C, 3344 (__mmask8) __U, __R); 3345 } 3346 3347 extern __inline __m512 3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3349 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3350 { 3351 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3352 (__v16sf) __B, 3353 (__v16sf) __C, 3354 (__mmask16) -1, __R); 3355 } 3356 3357 extern __inline __m512 3358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3359 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3360 __m512 __C, const int __R) 3361 { 3362 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3363 (__v16sf) __B, 3364 (__v16sf) __C, 3365 (__mmask16) __U, __R); 3366 } 3367 3368 extern __inline __m512 3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3370 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3371 __mmask16 __U, const int __R) 3372 { 3373 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 3374 (__v16sf) __B, 3375 (__v16sf) __C, 3376 (__mmask16) __U, __R); 3377 } 3378 3379 extern __inline __m512 3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3381 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3382 __m512 __C, const int __R) 3383 { 3384 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 3385 (__v16sf) __B, 3386 (__v16sf) __C, 3387 (__mmask16) __U, __R); 3388 } 3389 3390 extern __inline __m512d 3391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3392 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3393 { 3394 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, 3395 (__v8df) __B, 3396 (__v8df) __C, 3397 (__mmask8) -1, __R); 3398 } 3399 3400 extern __inline __m512d 3401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3402 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3403 __m512d __C, const int __R) 3404 { 3405 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, 3406 (__v8df) __B, 3407 (__v8df) __C, 3408 (__mmask8) __U, __R); 3409 } 3410 3411 extern __inline __m512d 3412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3413 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3414 __mmask8 __U, const int __R) 3415 { 3416 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3417 (__v8df) __B, 3418 (__v8df) __C, 3419 (__mmask8) __U, __R); 3420 } 3421 3422 extern __inline __m512d 3423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3424 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3425 __m512d __C, const int __R) 3426 { 3427 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A, 3428 (__v8df) __B, 3429 (__v8df) __C, 3430 (__mmask8) __U, __R); 3431 } 3432 3433 extern __inline __m512 3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3435 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3436 { 3437 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, 3438 (__v16sf) __B, 3439 (__v16sf) __C, 3440 (__mmask16) -1, __R); 3441 } 3442 3443 extern __inline __m512 3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3445 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3446 __m512 __C, const int __R) 3447 { 3448 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, 3449 (__v16sf) __B, 3450 (__v16sf) __C, 3451 (__mmask16) __U, __R); 3452 } 3453 3454 extern __inline __m512 3455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3456 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3457 __mmask16 __U, const int __R) 3458 { 3459 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3460 (__v16sf) __B, 3461 (__v16sf) __C, 3462 (__mmask16) __U, __R); 3463 } 3464 3465 extern __inline __m512 3466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3467 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3468 __m512 __C, const int __R) 3469 { 3470 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A, 3471 (__v16sf) __B, 3472 (__v16sf) __C, 3473 (__mmask16) __U, __R); 3474 } 3475 3476 extern __inline __m512d 3477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3478 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3479 { 3480 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3481 (__v8df) __B, 3482 (__v8df) __C, 3483 (__mmask8) -1, __R); 3484 } 3485 3486 extern __inline __m512d 3487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3488 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3489 __m512d __C, const int __R) 3490 { 3491 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3492 (__v8df) __B, 3493 (__v8df) __C, 3494 (__mmask8) __U, __R); 3495 } 3496 3497 extern __inline __m512d 3498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3499 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3500 __mmask8 __U, const int __R) 3501 { 3502 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3503 (__v8df) __B, 3504 (__v8df) __C, 3505 (__mmask8) __U, __R); 3506 } 3507 3508 extern __inline __m512d 3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3510 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3511 __m512d __C, const int __R) 3512 { 3513 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3514 (__v8df) __B, 3515 (__v8df) __C, 3516 (__mmask8) __U, __R); 3517 } 3518 3519 extern __inline __m512 3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3521 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3522 { 3523 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3524 (__v16sf) __B, 3525 (__v16sf) __C, 3526 (__mmask16) -1, __R); 3527 } 3528 3529 extern __inline __m512 3530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3531 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3532 __m512 __C, const int __R) 3533 { 3534 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3535 (__v16sf) __B, 3536 (__v16sf) __C, 3537 (__mmask16) __U, __R); 3538 } 3539 3540 extern __inline __m512 3541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3542 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3543 __mmask16 __U, const int __R) 3544 { 3545 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3546 (__v16sf) __B, 3547 (__v16sf) __C, 3548 (__mmask16) __U, __R); 3549 } 3550 3551 extern __inline __m512 3552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3553 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3554 __m512 __C, const int __R) 3555 { 3556 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3557 (__v16sf) __B, 3558 (__v16sf) __C, 3559 (__mmask16) __U, __R); 3560 } 3561 3562 extern __inline __m512d 3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3564 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3565 { 3566 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3567 (__v8df) __B, 3568 -(__v8df) __C, 3569 (__mmask8) -1, __R); 3570 } 3571 3572 extern __inline __m512d 3573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3574 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3575 __m512d __C, const int __R) 3576 { 3577 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3578 (__v8df) __B, 3579 -(__v8df) __C, 3580 (__mmask8) __U, __R); 3581 } 3582 3583 extern __inline __m512d 3584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3585 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3586 __mmask8 __U, const int __R) 3587 { 3588 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3589 (__v8df) __B, 3590 (__v8df) __C, 3591 (__mmask8) __U, __R); 3592 } 3593 3594 extern __inline __m512d 3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3596 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3597 __m512d __C, const int __R) 3598 { 3599 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3600 (__v8df) __B, 3601 -(__v8df) __C, 3602 (__mmask8) __U, __R); 3603 } 3604 3605 extern __inline __m512 3606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3607 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3608 { 3609 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3610 (__v16sf) __B, 3611 -(__v16sf) __C, 3612 (__mmask16) -1, __R); 3613 } 3614 3615 extern __inline __m512 3616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3617 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3618 __m512 __C, const int __R) 3619 { 3620 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3621 (__v16sf) __B, 3622 -(__v16sf) __C, 3623 (__mmask16) __U, __R); 3624 } 3625 3626 extern __inline __m512 3627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3628 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3629 __mmask16 __U, const int __R) 3630 { 3631 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3632 (__v16sf) __B, 3633 (__v16sf) __C, 3634 (__mmask16) __U, __R); 3635 } 3636 3637 extern __inline __m512 3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3639 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3640 __m512 __C, const int __R) 3641 { 3642 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3643 (__v16sf) __B, 3644 -(__v16sf) __C, 3645 (__mmask16) __U, __R); 3646 } 3647 3648 extern __inline __m512d 3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3650 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3651 { 3652 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3653 (__v8df) __B, 3654 (__v8df) __C, 3655 (__mmask8) -1, __R); 3656 } 3657 3658 extern __inline __m512d 3659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3660 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3661 __m512d __C, const int __R) 3662 { 3663 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3664 (__v8df) __B, 3665 (__v8df) __C, 3666 (__mmask8) __U, __R); 3667 } 3668 3669 extern __inline __m512d 3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3671 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3672 __mmask8 __U, const int __R) 3673 { 3674 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A, 3675 (__v8df) __B, 3676 (__v8df) __C, 3677 (__mmask8) __U, __R); 3678 } 3679 3680 extern __inline __m512d 3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3682 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3683 __m512d __C, const int __R) 3684 { 3685 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A, 3686 (__v8df) __B, 3687 (__v8df) __C, 3688 (__mmask8) __U, __R); 3689 } 3690 3691 extern __inline __m512 3692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3693 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3694 { 3695 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3696 (__v16sf) __B, 3697 (__v16sf) __C, 3698 (__mmask16) -1, __R); 3699 } 3700 3701 extern __inline __m512 3702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3703 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3704 __m512 __C, const int __R) 3705 { 3706 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3707 (__v16sf) __B, 3708 (__v16sf) __C, 3709 (__mmask16) __U, __R); 3710 } 3711 3712 extern __inline __m512 3713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3714 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3715 __mmask16 __U, const int __R) 3716 { 3717 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A, 3718 (__v16sf) __B, 3719 (__v16sf) __C, 3720 (__mmask16) __U, __R); 3721 } 3722 3723 extern __inline __m512 3724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3725 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3726 __m512 __C, const int __R) 3727 { 3728 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A, 3729 (__v16sf) __B, 3730 (__v16sf) __C, 3731 (__mmask16) __U, __R); 3732 } 3733 3734 extern __inline __m512d 3735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3736 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3737 { 3738 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3739 (__v8df) __B, 3740 (__v8df) __C, 3741 (__mmask8) -1, __R); 3742 } 3743 3744 extern __inline __m512d 3745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3746 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3747 __m512d __C, const int __R) 3748 { 3749 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3750 (__v8df) __B, 3751 (__v8df) __C, 3752 (__mmask8) __U, __R); 3753 } 3754 3755 extern __inline __m512d 3756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3757 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3758 __mmask8 __U, const int __R) 3759 { 3760 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3761 (__v8df) __B, 3762 (__v8df) __C, 3763 (__mmask8) __U, __R); 3764 } 3765 3766 extern __inline __m512d 3767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3768 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3769 __m512d __C, const int __R) 3770 { 3771 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A, 3772 (__v8df) __B, 3773 (__v8df) __C, 3774 (__mmask8) __U, __R); 3775 } 3776 3777 extern __inline __m512 3778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3779 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3780 { 3781 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3782 (__v16sf) __B, 3783 (__v16sf) __C, 3784 (__mmask16) -1, __R); 3785 } 3786 3787 extern __inline __m512 3788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3789 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3790 __m512 __C, const int __R) 3791 { 3792 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3793 (__v16sf) __B, 3794 (__v16sf) __C, 3795 (__mmask16) __U, __R); 3796 } 3797 3798 extern __inline __m512 3799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3800 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3801 __mmask16 __U, const int __R) 3802 { 3803 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3804 (__v16sf) __B, 3805 (__v16sf) __C, 3806 (__mmask16) __U, __R); 3807 } 3808 3809 extern __inline __m512 3810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3811 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3812 __m512 __C, const int __R) 3813 { 3814 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A, 3815 (__v16sf) __B, 3816 (__v16sf) __C, 3817 (__mmask16) __U, __R); 3818 } 3819 #else 3820 #define _mm512_fmadd_round_pd(A, B, C, R) \ 3821 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) 3822 3823 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 3824 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3825 3826 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 3827 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) 3828 3829 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 3830 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) 3831 3832 #define _mm512_fmadd_round_ps(A, B, C, R) \ 3833 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) 3834 3835 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 3836 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) 3837 3838 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 3839 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) 3840 3841 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 3842 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) 3843 3844 #define _mm512_fmsub_round_pd(A, B, C, R) \ 3845 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R) 3846 3847 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 3848 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R) 3849 3850 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3851 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) 3852 3853 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 3854 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R) 3855 3856 #define _mm512_fmsub_round_ps(A, B, C, R) \ 3857 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R) 3858 3859 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 3860 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R) 3861 3862 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3863 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) 3864 3865 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 3866 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R) 3867 3868 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 3869 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) 3870 3871 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 3872 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R) 3873 3874 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 3875 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) 3876 3877 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 3878 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) 3879 3880 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 3881 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) 3882 3883 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 3884 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) 3885 3886 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 3887 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) 3888 3889 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 3890 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) 3891 3892 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 3893 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) 3894 3895 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 3896 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) 3897 3898 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3899 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) 3900 3901 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 3902 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) 3903 3904 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 3905 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) 3906 3907 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3908 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) 3909 3910 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3911 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) 3912 3913 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3914 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) 3915 3916 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 3917 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R) 3918 3919 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3920 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R) 3921 3922 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 3923 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R) 3924 3925 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 3926 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R) 3927 3928 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 3929 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R) 3930 3931 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3932 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R) 3933 3934 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 3935 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R) 3936 3937 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 3938 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R) 3939 3940 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 3941 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R) 3942 3943 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3944 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) 3945 3946 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3947 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) 3948 3949 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 3950 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R) 3951 3952 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 3953 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R) 3954 3955 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3956 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) 3957 3958 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3959 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) 3960 3961 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 3962 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R) 3963 #endif 3964 3965 extern __inline __m512i 3966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3967 _mm512_abs_epi64 (__m512i __A) 3968 { 3969 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3970 (__v8di) 3971 _mm512_undefined_epi32 (), 3972 (__mmask8) -1); 3973 } 3974 3975 extern __inline __m512i 3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3977 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 3978 { 3979 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3980 (__v8di) __W, 3981 (__mmask8) __U); 3982 } 3983 3984 extern __inline __m512i 3985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3986 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 3987 { 3988 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3989 (__v8di) 3990 _mm512_setzero_si512 (), 3991 (__mmask8) __U); 3992 } 3993 3994 extern __inline __m512i 3995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3996 _mm512_abs_epi32 (__m512i __A) 3997 { 3998 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3999 (__v16si) 4000 _mm512_undefined_epi32 (), 4001 (__mmask16) -1); 4002 } 4003 4004 extern __inline __m512i 4005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4006 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 4007 { 4008 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 4009 (__v16si) __W, 4010 (__mmask16) __U); 4011 } 4012 4013 extern __inline __m512i 4014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4015 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 4016 { 4017 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 4018 (__v16si) 4019 _mm512_setzero_si512 (), 4020 (__mmask16) __U); 4021 } 4022 4023 extern __inline __m512 4024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4025 _mm512_broadcastss_ps (__m128 __A) 4026 { 4027 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 4028 (__v16sf) 4029 _mm512_undefined_ps (), 4030 (__mmask16) -1); 4031 } 4032 4033 extern __inline __m512 4034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4035 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 4036 { 4037 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 4038 (__v16sf) __O, __M); 4039 } 4040 4041 extern __inline __m512 4042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4043 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 4044 { 4045 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 4046 (__v16sf) 4047 _mm512_setzero_ps (), 4048 __M); 4049 } 4050 4051 extern __inline __m512d 4052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4053 _mm512_broadcastsd_pd (__m128d __A) 4054 { 4055 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 4056 (__v8df) 4057 _mm512_undefined_pd (), 4058 (__mmask8) -1); 4059 } 4060 4061 extern __inline __m512d 4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4063 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 4064 { 4065 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 4066 (__v8df) __O, __M); 4067 } 4068 4069 extern __inline __m512d 4070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4071 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 4072 { 4073 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 4074 (__v8df) 4075 _mm512_setzero_pd (), 4076 __M); 4077 } 4078 4079 extern __inline __m512i 4080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4081 _mm512_broadcastd_epi32 (__m128i __A) 4082 { 4083 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 4084 (__v16si) 4085 _mm512_undefined_epi32 (), 4086 (__mmask16) -1); 4087 } 4088 4089 extern __inline __m512i 4090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4091 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 4092 { 4093 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 4094 (__v16si) __O, __M); 4095 } 4096 4097 extern __inline __m512i 4098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4099 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 4100 { 4101 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 4102 (__v16si) 4103 _mm512_setzero_si512 (), 4104 __M); 4105 } 4106 4107 extern __inline __m512i 4108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4109 _mm512_set1_epi32 (int __A) 4110 { 4111 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 4112 (__v16si) 4113 _mm512_undefined_epi32 (), 4114 (__mmask16)(-1)); 4115 } 4116 4117 extern __inline __m512i 4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4119 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 4120 { 4121 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 4122 __M); 4123 } 4124 4125 extern __inline __m512i 4126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4127 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A) 4128 { 4129 return (__m512i) 4130 __builtin_ia32_pbroadcastd512_gpr_mask (__A, 4131 (__v16si) _mm512_setzero_si512 (), 4132 __M); 4133 } 4134 4135 extern __inline __m512i 4136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4137 _mm512_broadcastq_epi64 (__m128i __A) 4138 { 4139 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4140 (__v8di) 4141 _mm512_undefined_epi32 (), 4142 (__mmask8) -1); 4143 } 4144 4145 extern __inline __m512i 4146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4147 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 4148 { 4149 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4150 (__v8di) __O, __M); 4151 } 4152 4153 extern __inline __m512i 4154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4155 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 4156 { 4157 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4158 (__v8di) 4159 _mm512_setzero_si512 (), 4160 __M); 4161 } 4162 4163 extern __inline __m512i 4164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4165 _mm512_set1_epi64 (long long __A) 4166 { 4167 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 4168 (__v8di) 4169 _mm512_undefined_epi32 (), 4170 (__mmask8)(-1)); 4171 } 4172 4173 extern __inline __m512i 4174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4175 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 4176 { 4177 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 4178 __M); 4179 } 4180 4181 extern __inline __m512i 4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4183 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) 4184 { 4185 return (__m512i) 4186 __builtin_ia32_pbroadcastq512_gpr_mask (__A, 4187 (__v8di) _mm512_setzero_si512 (), 4188 __M); 4189 } 4190 4191 extern __inline __m512 4192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4193 _mm512_broadcast_f32x4 (__m128 __A) 4194 { 4195 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4196 (__v16sf) 4197 _mm512_undefined_ps (), 4198 (__mmask16) -1); 4199 } 4200 4201 extern __inline __m512 4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4203 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 4204 { 4205 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4206 (__v16sf) __O, 4207 __M); 4208 } 4209 4210 extern __inline __m512 4211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4212 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 4213 { 4214 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4215 (__v16sf) 4216 _mm512_setzero_ps (), 4217 __M); 4218 } 4219 4220 extern __inline __m512i 4221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4222 _mm512_broadcast_i32x4 (__m128i __A) 4223 { 4224 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4225 (__v16si) 4226 _mm512_undefined_epi32 (), 4227 (__mmask16) -1); 4228 } 4229 4230 extern __inline __m512i 4231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4232 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 4233 { 4234 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4235 (__v16si) __O, 4236 __M); 4237 } 4238 4239 extern __inline __m512i 4240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4241 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 4242 { 4243 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4244 (__v16si) 4245 _mm512_setzero_si512 (), 4246 __M); 4247 } 4248 4249 extern __inline __m512d 4250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4251 _mm512_broadcast_f64x4 (__m256d __A) 4252 { 4253 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4254 (__v8df) 4255 _mm512_undefined_pd (), 4256 (__mmask8) -1); 4257 } 4258 4259 extern __inline __m512d 4260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4261 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 4262 { 4263 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4264 (__v8df) __O, 4265 __M); 4266 } 4267 4268 extern __inline __m512d 4269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4270 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 4271 { 4272 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4273 (__v8df) 4274 _mm512_setzero_pd (), 4275 __M); 4276 } 4277 4278 extern __inline __m512i 4279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4280 _mm512_broadcast_i64x4 (__m256i __A) 4281 { 4282 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4283 (__v8di) 4284 _mm512_undefined_epi32 (), 4285 (__mmask8) -1); 4286 } 4287 4288 extern __inline __m512i 4289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4290 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 4291 { 4292 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4293 (__v8di) __O, 4294 __M); 4295 } 4296 4297 extern __inline __m512i 4298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4299 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 4300 { 4301 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4302 (__v8di) 4303 _mm512_setzero_si512 (), 4304 __M); 4305 } 4306 4307 typedef enum 4308 { 4309 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 4310 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 4311 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 4312 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 4313 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 4314 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 4315 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 4316 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 4317 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 4318 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 4319 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 4320 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 4321 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 4322 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 4323 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 4324 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 4325 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 4326 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 4327 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 4328 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 4329 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 4330 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 4331 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 4332 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 4333 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 4334 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 4335 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 4336 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 4337 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 4338 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 4339 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 4340 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 4341 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 4342 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 4343 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 4344 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 4345 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 4346 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 4347 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 4348 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 4349 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 4350 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 4351 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 4352 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 4353 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 4354 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 4355 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 4356 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 4357 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 4358 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 4359 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 4360 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 4361 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 4362 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 4363 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 4364 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 4365 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 4366 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 4367 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 4368 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 4369 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 4370 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 4371 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 4372 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 4373 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 4374 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 4375 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 4376 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 4377 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 4378 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 4379 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 4380 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 4381 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 4382 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 4383 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 4384 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 4385 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 4386 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 4387 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 4388 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 4389 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 4390 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 4391 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 4392 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 4393 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 4394 _MM_PERM_DDDD = 0xFF 4395 } _MM_PERM_ENUM; 4396 4397 #ifdef __OPTIMIZE__ 4398 extern __inline __m512i 4399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4400 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) 4401 { 4402 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4403 __mask, 4404 (__v16si) 4405 _mm512_undefined_epi32 (), 4406 (__mmask16) -1); 4407 } 4408 4409 extern __inline __m512i 4410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4411 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 4412 _MM_PERM_ENUM __mask) 4413 { 4414 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4415 __mask, 4416 (__v16si) __W, 4417 (__mmask16) __U); 4418 } 4419 4420 extern __inline __m512i 4421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4422 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) 4423 { 4424 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4425 __mask, 4426 (__v16si) 4427 _mm512_setzero_si512 (), 4428 (__mmask16) __U); 4429 } 4430 4431 extern __inline __m512i 4432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4433 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) 4434 { 4435 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4436 (__v8di) __B, __imm, 4437 (__v8di) 4438 _mm512_undefined_epi32 (), 4439 (__mmask8) -1); 4440 } 4441 4442 extern __inline __m512i 4443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4444 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, 4445 __m512i __B, const int __imm) 4446 { 4447 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4448 (__v8di) __B, __imm, 4449 (__v8di) __W, 4450 (__mmask8) __U); 4451 } 4452 4453 extern __inline __m512i 4454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4455 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, 4456 const int __imm) 4457 { 4458 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4459 (__v8di) __B, __imm, 4460 (__v8di) 4461 _mm512_setzero_si512 (), 4462 (__mmask8) __U); 4463 } 4464 4465 extern __inline __m512i 4466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4467 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) 4468 { 4469 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4470 (__v16si) __B, 4471 __imm, 4472 (__v16si) 4473 _mm512_undefined_epi32 (), 4474 (__mmask16) -1); 4475 } 4476 4477 extern __inline __m512i 4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4479 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, 4480 __m512i __B, const int __imm) 4481 { 4482 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4483 (__v16si) __B, 4484 __imm, 4485 (__v16si) __W, 4486 (__mmask16) __U); 4487 } 4488 4489 extern __inline __m512i 4490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4491 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, 4492 const int __imm) 4493 { 4494 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4495 (__v16si) __B, 4496 __imm, 4497 (__v16si) 4498 _mm512_setzero_si512 (), 4499 (__mmask16) __U); 4500 } 4501 4502 extern __inline __m512d 4503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4504 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) 4505 { 4506 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4507 (__v8df) __B, __imm, 4508 (__v8df) 4509 _mm512_undefined_pd (), 4510 (__mmask8) -1); 4511 } 4512 4513 extern __inline __m512d 4514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4515 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, 4516 __m512d __B, const int __imm) 4517 { 4518 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4519 (__v8df) __B, __imm, 4520 (__v8df) __W, 4521 (__mmask8) __U); 4522 } 4523 4524 extern __inline __m512d 4525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4526 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, 4527 const int __imm) 4528 { 4529 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4530 (__v8df) __B, __imm, 4531 (__v8df) 4532 _mm512_setzero_pd (), 4533 (__mmask8) __U); 4534 } 4535 4536 extern __inline __m512 4537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4538 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) 4539 { 4540 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4541 (__v16sf) __B, __imm, 4542 (__v16sf) 4543 _mm512_undefined_ps (), 4544 (__mmask16) -1); 4545 } 4546 4547 extern __inline __m512 4548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4549 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, 4550 __m512 __B, const int __imm) 4551 { 4552 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4553 (__v16sf) __B, __imm, 4554 (__v16sf) __W, 4555 (__mmask16) __U); 4556 } 4557 4558 extern __inline __m512 4559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4560 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, 4561 const int __imm) 4562 { 4563 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4564 (__v16sf) __B, __imm, 4565 (__v16sf) 4566 _mm512_setzero_ps (), 4567 (__mmask16) __U); 4568 } 4569 4570 #else 4571 #define _mm512_shuffle_epi32(X, C) \ 4572 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4573 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 4574 (__mmask16)-1)) 4575 4576 #define _mm512_mask_shuffle_epi32(W, U, X, C) \ 4577 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4578 (__v16si)(__m512i)(W),\ 4579 (__mmask16)(U))) 4580 4581 #define _mm512_maskz_shuffle_epi32(U, X, C) \ 4582 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4583 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4584 (__mmask16)(U))) 4585 4586 #define _mm512_shuffle_i64x2(X, Y, C) \ 4587 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4588 (__v8di)(__m512i)(Y), (int)(C),\ 4589 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 4590 (__mmask8)-1)) 4591 4592 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ 4593 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4594 (__v8di)(__m512i)(Y), (int)(C),\ 4595 (__v8di)(__m512i)(W),\ 4596 (__mmask8)(U))) 4597 4598 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ 4599 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4600 (__v8di)(__m512i)(Y), (int)(C),\ 4601 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 4602 (__mmask8)(U))) 4603 4604 #define _mm512_shuffle_i32x4(X, Y, C) \ 4605 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4606 (__v16si)(__m512i)(Y), (int)(C),\ 4607 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 4608 (__mmask16)-1)) 4609 4610 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ 4611 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4612 (__v16si)(__m512i)(Y), (int)(C),\ 4613 (__v16si)(__m512i)(W),\ 4614 (__mmask16)(U))) 4615 4616 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ 4617 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4618 (__v16si)(__m512i)(Y), (int)(C),\ 4619 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4620 (__mmask16)(U))) 4621 4622 #define _mm512_shuffle_f64x2(X, Y, C) \ 4623 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4624 (__v8df)(__m512d)(Y), (int)(C),\ 4625 (__v8df)(__m512d)_mm512_undefined_pd(),\ 4626 (__mmask8)-1)) 4627 4628 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ 4629 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4630 (__v8df)(__m512d)(Y), (int)(C),\ 4631 (__v8df)(__m512d)(W),\ 4632 (__mmask8)(U))) 4633 4634 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ 4635 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4636 (__v8df)(__m512d)(Y), (int)(C),\ 4637 (__v8df)(__m512d)_mm512_setzero_pd(),\ 4638 (__mmask8)(U))) 4639 4640 #define _mm512_shuffle_f32x4(X, Y, C) \ 4641 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4642 (__v16sf)(__m512)(Y), (int)(C),\ 4643 (__v16sf)(__m512)_mm512_undefined_ps(),\ 4644 (__mmask16)-1)) 4645 4646 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ 4647 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4648 (__v16sf)(__m512)(Y), (int)(C),\ 4649 (__v16sf)(__m512)(W),\ 4650 (__mmask16)(U))) 4651 4652 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ 4653 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4654 (__v16sf)(__m512)(Y), (int)(C),\ 4655 (__v16sf)(__m512)_mm512_setzero_ps(),\ 4656 (__mmask16)(U))) 4657 #endif 4658 4659 extern __inline __m512i 4660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4661 _mm512_rolv_epi32 (__m512i __A, __m512i __B) 4662 { 4663 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4664 (__v16si) __B, 4665 (__v16si) 4666 _mm512_undefined_epi32 (), 4667 (__mmask16) -1); 4668 } 4669 4670 extern __inline __m512i 4671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4672 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4673 { 4674 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4675 (__v16si) __B, 4676 (__v16si) __W, 4677 (__mmask16) __U); 4678 } 4679 4680 extern __inline __m512i 4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4682 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4683 { 4684 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4685 (__v16si) __B, 4686 (__v16si) 4687 _mm512_setzero_si512 (), 4688 (__mmask16) __U); 4689 } 4690 4691 extern __inline __m512i 4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4693 _mm512_rorv_epi32 (__m512i __A, __m512i __B) 4694 { 4695 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4696 (__v16si) __B, 4697 (__v16si) 4698 _mm512_undefined_epi32 (), 4699 (__mmask16) -1); 4700 } 4701 4702 extern __inline __m512i 4703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4704 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4705 { 4706 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4707 (__v16si) __B, 4708 (__v16si) __W, 4709 (__mmask16) __U); 4710 } 4711 4712 extern __inline __m512i 4713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4714 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4715 { 4716 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4717 (__v16si) __B, 4718 (__v16si) 4719 _mm512_setzero_si512 (), 4720 (__mmask16) __U); 4721 } 4722 4723 extern __inline __m512i 4724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4725 _mm512_rolv_epi64 (__m512i __A, __m512i __B) 4726 { 4727 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4728 (__v8di) __B, 4729 (__v8di) 4730 _mm512_undefined_epi32 (), 4731 (__mmask8) -1); 4732 } 4733 4734 extern __inline __m512i 4735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4736 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4737 { 4738 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4739 (__v8di) __B, 4740 (__v8di) __W, 4741 (__mmask8) __U); 4742 } 4743 4744 extern __inline __m512i 4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4746 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4747 { 4748 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4749 (__v8di) __B, 4750 (__v8di) 4751 _mm512_setzero_si512 (), 4752 (__mmask8) __U); 4753 } 4754 4755 extern __inline __m512i 4756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4757 _mm512_rorv_epi64 (__m512i __A, __m512i __B) 4758 { 4759 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4760 (__v8di) __B, 4761 (__v8di) 4762 _mm512_undefined_epi32 (), 4763 (__mmask8) -1); 4764 } 4765 4766 extern __inline __m512i 4767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4768 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4769 { 4770 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4771 (__v8di) __B, 4772 (__v8di) __W, 4773 (__mmask8) __U); 4774 } 4775 4776 extern __inline __m512i 4777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4778 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4779 { 4780 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4781 (__v8di) __B, 4782 (__v8di) 4783 _mm512_setzero_si512 (), 4784 (__mmask8) __U); 4785 } 4786 4787 #ifdef __OPTIMIZE__ 4788 extern __inline __m256i 4789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4790 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) 4791 { 4792 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4793 (__v8si) 4794 _mm256_undefined_si256 (), 4795 (__mmask8) -1, __R); 4796 } 4797 4798 extern __inline __m256i 4799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4800 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4801 const int __R) 4802 { 4803 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4804 (__v8si) __W, 4805 (__mmask8) __U, __R); 4806 } 4807 4808 extern __inline __m256i 4809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4810 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4811 { 4812 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4813 (__v8si) 4814 _mm256_setzero_si256 (), 4815 (__mmask8) __U, __R); 4816 } 4817 4818 extern __inline __m256i 4819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4820 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) 4821 { 4822 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4823 (__v8si) 4824 _mm256_undefined_si256 (), 4825 (__mmask8) -1, __R); 4826 } 4827 4828 extern __inline __m256i 4829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4830 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4831 const int __R) 4832 { 4833 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4834 (__v8si) __W, 4835 (__mmask8) __U, __R); 4836 } 4837 4838 extern __inline __m256i 4839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4840 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4841 { 4842 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4843 (__v8si) 4844 _mm256_setzero_si256 (), 4845 (__mmask8) __U, __R); 4846 } 4847 #else 4848 #define _mm512_cvtt_roundpd_epi32(A, B) \ 4849 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4850 4851 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ 4852 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) 4853 4854 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ 4855 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4856 4857 #define _mm512_cvtt_roundpd_epu32(A, B) \ 4858 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4859 4860 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ 4861 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) 4862 4863 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ 4864 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4865 #endif 4866 4867 #ifdef __OPTIMIZE__ 4868 extern __inline __m256i 4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4870 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) 4871 { 4872 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4873 (__v8si) 4874 _mm256_undefined_si256 (), 4875 (__mmask8) -1, __R); 4876 } 4877 4878 extern __inline __m256i 4879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4880 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4881 const int __R) 4882 { 4883 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4884 (__v8si) __W, 4885 (__mmask8) __U, __R); 4886 } 4887 4888 extern __inline __m256i 4889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4890 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4891 { 4892 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4893 (__v8si) 4894 _mm256_setzero_si256 (), 4895 (__mmask8) __U, __R); 4896 } 4897 4898 extern __inline __m256i 4899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4900 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) 4901 { 4902 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4903 (__v8si) 4904 _mm256_undefined_si256 (), 4905 (__mmask8) -1, __R); 4906 } 4907 4908 extern __inline __m256i 4909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4910 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4911 const int __R) 4912 { 4913 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4914 (__v8si) __W, 4915 (__mmask8) __U, __R); 4916 } 4917 4918 extern __inline __m256i 4919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4920 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4921 { 4922 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4923 (__v8si) 4924 _mm256_setzero_si256 (), 4925 (__mmask8) __U, __R); 4926 } 4927 #else 4928 #define _mm512_cvt_roundpd_epi32(A, B) \ 4929 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4930 4931 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ 4932 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) 4933 4934 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ 4935 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4936 4937 #define _mm512_cvt_roundpd_epu32(A, B) \ 4938 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4939 4940 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ 4941 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) 4942 4943 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ 4944 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4945 #endif 4946 4947 #ifdef __OPTIMIZE__ 4948 extern __inline __m512i 4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4950 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) 4951 { 4952 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4953 (__v16si) 4954 _mm512_undefined_epi32 (), 4955 (__mmask16) -1, __R); 4956 } 4957 4958 extern __inline __m512i 4959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4960 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4961 const int __R) 4962 { 4963 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4964 (__v16si) __W, 4965 (__mmask16) __U, __R); 4966 } 4967 4968 extern __inline __m512i 4969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4970 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4971 { 4972 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4973 (__v16si) 4974 _mm512_setzero_si512 (), 4975 (__mmask16) __U, __R); 4976 } 4977 4978 extern __inline __m512i 4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4980 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) 4981 { 4982 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4983 (__v16si) 4984 _mm512_undefined_epi32 (), 4985 (__mmask16) -1, __R); 4986 } 4987 4988 extern __inline __m512i 4989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4990 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4991 const int __R) 4992 { 4993 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4994 (__v16si) __W, 4995 (__mmask16) __U, __R); 4996 } 4997 4998 extern __inline __m512i 4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5000 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 5001 { 5002 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 5003 (__v16si) 5004 _mm512_setzero_si512 (), 5005 (__mmask16) __U, __R); 5006 } 5007 #else 5008 #define _mm512_cvtt_roundps_epi32(A, B) \ 5009 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 5010 5011 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ 5012 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) 5013 5014 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ 5015 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 5016 5017 #define _mm512_cvtt_roundps_epu32(A, B) \ 5018 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 5019 5020 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ 5021 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) 5022 5023 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ 5024 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 5025 #endif 5026 5027 #ifdef __OPTIMIZE__ 5028 extern __inline __m512i 5029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5030 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R) 5031 { 5032 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 5033 (__v16si) 5034 _mm512_undefined_epi32 (), 5035 (__mmask16) -1, __R); 5036 } 5037 5038 extern __inline __m512i 5039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5040 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 5041 const int __R) 5042 { 5043 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 5044 (__v16si) __W, 5045 (__mmask16) __U, __R); 5046 } 5047 5048 extern __inline __m512i 5049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5050 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 5051 { 5052 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 5053 (__v16si) 5054 _mm512_setzero_si512 (), 5055 (__mmask16) __U, __R); 5056 } 5057 5058 extern __inline __m512i 5059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5060 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R) 5061 { 5062 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 5063 (__v16si) 5064 _mm512_undefined_epi32 (), 5065 (__mmask16) -1, __R); 5066 } 5067 5068 extern __inline __m512i 5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5070 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 5071 const int __R) 5072 { 5073 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 5074 (__v16si) __W, 5075 (__mmask16) __U, __R); 5076 } 5077 5078 extern __inline __m512i 5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5080 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 5081 { 5082 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 5083 (__v16si) 5084 _mm512_setzero_si512 (), 5085 (__mmask16) __U, __R); 5086 } 5087 #else 5088 #define _mm512_cvt_roundps_epi32(A, B) \ 5089 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 5090 5091 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ 5092 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) 5093 5094 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ 5095 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 5096 5097 #define _mm512_cvt_roundps_epu32(A, B) \ 5098 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 5099 5100 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ 5101 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) 5102 5103 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ 5104 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 5105 #endif 5106 5107 extern __inline __m128d 5108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5109 _mm_cvtu32_sd (__m128d __A, unsigned __B) 5110 { 5111 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 5112 } 5113 5114 #ifdef __x86_64__ 5115 #ifdef __OPTIMIZE__ 5116 extern __inline __m128d 5117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5118 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) 5119 { 5120 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); 5121 } 5122 5123 extern __inline __m128d 5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5125 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) 5126 { 5127 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 5128 } 5129 5130 extern __inline __m128d 5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5132 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) 5133 { 5134 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 5135 } 5136 #else 5137 #define _mm_cvt_roundu64_sd(A, B, C) \ 5138 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) 5139 5140 #define _mm_cvt_roundi64_sd(A, B, C) \ 5141 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 5142 5143 #define _mm_cvt_roundsi64_sd(A, B, C) \ 5144 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 5145 #endif 5146 5147 #endif 5148 5149 #ifdef __OPTIMIZE__ 5150 extern __inline __m128 5151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5152 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) 5153 { 5154 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); 5155 } 5156 5157 extern __inline __m128 5158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5159 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) 5160 { 5161 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 5162 } 5163 5164 extern __inline __m128 5165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5166 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) 5167 { 5168 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 5169 } 5170 #else 5171 #define _mm_cvt_roundu32_ss(A, B, C) \ 5172 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C) 5173 5174 #define _mm_cvt_roundi32_ss(A, B, C) \ 5175 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 5176 5177 #define _mm_cvt_roundsi32_ss(A, B, C) \ 5178 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 5179 #endif 5180 5181 #ifdef __x86_64__ 5182 #ifdef __OPTIMIZE__ 5183 extern __inline __m128 5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5185 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) 5186 { 5187 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); 5188 } 5189 5190 extern __inline __m128 5191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5192 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) 5193 { 5194 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 5195 } 5196 5197 extern __inline __m128 5198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5199 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) 5200 { 5201 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 5202 } 5203 #else 5204 #define _mm_cvt_roundu64_ss(A, B, C) \ 5205 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C) 5206 5207 #define _mm_cvt_roundi64_ss(A, B, C) \ 5208 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 5209 5210 #define _mm_cvt_roundsi64_ss(A, B, C) \ 5211 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 5212 #endif 5213 5214 #endif 5215 5216 extern __inline __m128i 5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5218 _mm512_cvtepi32_epi8 (__m512i __A) 5219 { 5220 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5221 (__v16qi) 5222 _mm_undefined_si128 (), 5223 (__mmask16) -1); 5224 } 5225 5226 extern __inline void 5227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5228 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5229 { 5230 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5231 } 5232 5233 extern __inline __m128i 5234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5235 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5236 { 5237 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5238 (__v16qi) __O, __M); 5239 } 5240 5241 extern __inline __m128i 5242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5243 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 5244 { 5245 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5246 (__v16qi) 5247 _mm_setzero_si128 (), 5248 __M); 5249 } 5250 5251 extern __inline __m128i 5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5253 _mm512_cvtsepi32_epi8 (__m512i __A) 5254 { 5255 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5256 (__v16qi) 5257 _mm_undefined_si128 (), 5258 (__mmask16) -1); 5259 } 5260 5261 extern __inline void 5262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5263 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5264 { 5265 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5266 } 5267 5268 extern __inline __m128i 5269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5270 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5271 { 5272 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5273 (__v16qi) __O, __M); 5274 } 5275 5276 extern __inline __m128i 5277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5278 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 5279 { 5280 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5281 (__v16qi) 5282 _mm_setzero_si128 (), 5283 __M); 5284 } 5285 5286 extern __inline __m128i 5287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5288 _mm512_cvtusepi32_epi8 (__m512i __A) 5289 { 5290 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5291 (__v16qi) 5292 _mm_undefined_si128 (), 5293 (__mmask16) -1); 5294 } 5295 5296 extern __inline void 5297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5298 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5299 { 5300 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5301 } 5302 5303 extern __inline __m128i 5304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5305 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5306 { 5307 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5308 (__v16qi) __O, 5309 __M); 5310 } 5311 5312 extern __inline __m128i 5313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5314 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 5315 { 5316 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5317 (__v16qi) 5318 _mm_setzero_si128 (), 5319 __M); 5320 } 5321 5322 extern __inline __m256i 5323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5324 _mm512_cvtepi32_epi16 (__m512i __A) 5325 { 5326 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5327 (__v16hi) 5328 _mm256_undefined_si256 (), 5329 (__mmask16) -1); 5330 } 5331 5332 extern __inline void 5333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5334 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 5335 { 5336 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 5337 } 5338 5339 extern __inline __m256i 5340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5341 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5342 { 5343 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5344 (__v16hi) __O, __M); 5345 } 5346 5347 extern __inline __m256i 5348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5349 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 5350 { 5351 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5352 (__v16hi) 5353 _mm256_setzero_si256 (), 5354 __M); 5355 } 5356 5357 extern __inline __m256i 5358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5359 _mm512_cvtsepi32_epi16 (__m512i __A) 5360 { 5361 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5362 (__v16hi) 5363 _mm256_undefined_si256 (), 5364 (__mmask16) -1); 5365 } 5366 5367 extern __inline void 5368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5369 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 5370 { 5371 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 5372 } 5373 5374 extern __inline __m256i 5375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5376 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5377 { 5378 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5379 (__v16hi) __O, __M); 5380 } 5381 5382 extern __inline __m256i 5383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5384 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 5385 { 5386 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5387 (__v16hi) 5388 _mm256_setzero_si256 (), 5389 __M); 5390 } 5391 5392 extern __inline __m256i 5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5394 _mm512_cvtusepi32_epi16 (__m512i __A) 5395 { 5396 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5397 (__v16hi) 5398 _mm256_undefined_si256 (), 5399 (__mmask16) -1); 5400 } 5401 5402 extern __inline void 5403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5404 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 5405 { 5406 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 5407 } 5408 5409 extern __inline __m256i 5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5411 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5412 { 5413 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5414 (__v16hi) __O, 5415 __M); 5416 } 5417 5418 extern __inline __m256i 5419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5420 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 5421 { 5422 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5423 (__v16hi) 5424 _mm256_setzero_si256 (), 5425 __M); 5426 } 5427 5428 extern __inline __m256i 5429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5430 _mm512_cvtepi64_epi32 (__m512i __A) 5431 { 5432 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5433 (__v8si) 5434 _mm256_undefined_si256 (), 5435 (__mmask8) -1); 5436 } 5437 5438 extern __inline void 5439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5440 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 5441 { 5442 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 5443 } 5444 5445 extern __inline __m256i 5446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5447 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5448 { 5449 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5450 (__v8si) __O, __M); 5451 } 5452 5453 extern __inline __m256i 5454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5455 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 5456 { 5457 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5458 (__v8si) 5459 _mm256_setzero_si256 (), 5460 __M); 5461 } 5462 5463 extern __inline __m256i 5464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5465 _mm512_cvtsepi64_epi32 (__m512i __A) 5466 { 5467 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5468 (__v8si) 5469 _mm256_undefined_si256 (), 5470 (__mmask8) -1); 5471 } 5472 5473 extern __inline void 5474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5475 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 5476 { 5477 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 5478 } 5479 5480 extern __inline __m256i 5481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5482 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5483 { 5484 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5485 (__v8si) __O, __M); 5486 } 5487 5488 extern __inline __m256i 5489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5490 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 5491 { 5492 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5493 (__v8si) 5494 _mm256_setzero_si256 (), 5495 __M); 5496 } 5497 5498 extern __inline __m256i 5499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5500 _mm512_cvtusepi64_epi32 (__m512i __A) 5501 { 5502 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5503 (__v8si) 5504 _mm256_undefined_si256 (), 5505 (__mmask8) -1); 5506 } 5507 5508 extern __inline void 5509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5510 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 5511 { 5512 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 5513 } 5514 5515 extern __inline __m256i 5516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5517 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5518 { 5519 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5520 (__v8si) __O, __M); 5521 } 5522 5523 extern __inline __m256i 5524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5525 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 5526 { 5527 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5528 (__v8si) 5529 _mm256_setzero_si256 (), 5530 __M); 5531 } 5532 5533 extern __inline __m128i 5534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5535 _mm512_cvtepi64_epi16 (__m512i __A) 5536 { 5537 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5538 (__v8hi) 5539 _mm_undefined_si128 (), 5540 (__mmask8) -1); 5541 } 5542 5543 extern __inline void 5544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5545 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5546 { 5547 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5548 } 5549 5550 extern __inline __m128i 5551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5552 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5553 { 5554 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5555 (__v8hi) __O, __M); 5556 } 5557 5558 extern __inline __m128i 5559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5560 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 5561 { 5562 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5563 (__v8hi) 5564 _mm_setzero_si128 (), 5565 __M); 5566 } 5567 5568 extern __inline __m128i 5569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5570 _mm512_cvtsepi64_epi16 (__m512i __A) 5571 { 5572 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5573 (__v8hi) 5574 _mm_undefined_si128 (), 5575 (__mmask8) -1); 5576 } 5577 5578 extern __inline void 5579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5580 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 5581 { 5582 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5583 } 5584 5585 extern __inline __m128i 5586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5587 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5588 { 5589 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5590 (__v8hi) __O, __M); 5591 } 5592 5593 extern __inline __m128i 5594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5595 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 5596 { 5597 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5598 (__v8hi) 5599 _mm_setzero_si128 (), 5600 __M); 5601 } 5602 5603 extern __inline __m128i 5604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5605 _mm512_cvtusepi64_epi16 (__m512i __A) 5606 { 5607 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5608 (__v8hi) 5609 _mm_undefined_si128 (), 5610 (__mmask8) -1); 5611 } 5612 5613 extern __inline void 5614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5615 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5616 { 5617 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 5618 } 5619 5620 extern __inline __m128i 5621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5622 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5623 { 5624 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5625 (__v8hi) __O, __M); 5626 } 5627 5628 extern __inline __m128i 5629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5630 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 5631 { 5632 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5633 (__v8hi) 5634 _mm_setzero_si128 (), 5635 __M); 5636 } 5637 5638 extern __inline __m128i 5639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5640 _mm512_cvtepi64_epi8 (__m512i __A) 5641 { 5642 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5643 (__v16qi) 5644 _mm_undefined_si128 (), 5645 (__mmask8) -1); 5646 } 5647 5648 extern __inline void 5649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5650 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5651 { 5652 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5653 } 5654 5655 extern __inline __m128i 5656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5657 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5658 { 5659 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5660 (__v16qi) __O, __M); 5661 } 5662 5663 extern __inline __m128i 5664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5665 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 5666 { 5667 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5668 (__v16qi) 5669 _mm_setzero_si128 (), 5670 __M); 5671 } 5672 5673 extern __inline __m128i 5674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5675 _mm512_cvtsepi64_epi8 (__m512i __A) 5676 { 5677 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5678 (__v16qi) 5679 _mm_undefined_si128 (), 5680 (__mmask8) -1); 5681 } 5682 5683 extern __inline void 5684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5685 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5686 { 5687 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5688 } 5689 5690 extern __inline __m128i 5691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5692 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5693 { 5694 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5695 (__v16qi) __O, __M); 5696 } 5697 5698 extern __inline __m128i 5699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5700 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 5701 { 5702 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5703 (__v16qi) 5704 _mm_setzero_si128 (), 5705 __M); 5706 } 5707 5708 extern __inline __m128i 5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5710 _mm512_cvtusepi64_epi8 (__m512i __A) 5711 { 5712 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5713 (__v16qi) 5714 _mm_undefined_si128 (), 5715 (__mmask8) -1); 5716 } 5717 5718 extern __inline void 5719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5720 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5721 { 5722 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5723 } 5724 5725 extern __inline __m128i 5726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5727 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5728 { 5729 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5730 (__v16qi) __O, 5731 __M); 5732 } 5733 5734 extern __inline __m128i 5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5736 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 5737 { 5738 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5739 (__v16qi) 5740 _mm_setzero_si128 (), 5741 __M); 5742 } 5743 5744 extern __inline __m512d 5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5746 _mm512_cvtepi32_pd (__m256i __A) 5747 { 5748 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5749 (__v8df) 5750 _mm512_undefined_pd (), 5751 (__mmask8) -1); 5752 } 5753 5754 extern __inline __m512d 5755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5756 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5757 { 5758 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5759 (__v8df) __W, 5760 (__mmask8) __U); 5761 } 5762 5763 extern __inline __m512d 5764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5765 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 5766 { 5767 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5768 (__v8df) 5769 _mm512_setzero_pd (), 5770 (__mmask8) __U); 5771 } 5772 5773 extern __inline __m512d 5774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5775 _mm512_cvtepu32_pd (__m256i __A) 5776 { 5777 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5778 (__v8df) 5779 _mm512_undefined_pd (), 5780 (__mmask8) -1); 5781 } 5782 5783 extern __inline __m512d 5784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5785 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5786 { 5787 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5788 (__v8df) __W, 5789 (__mmask8) __U); 5790 } 5791 5792 extern __inline __m512d 5793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5794 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 5795 { 5796 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5797 (__v8df) 5798 _mm512_setzero_pd (), 5799 (__mmask8) __U); 5800 } 5801 5802 #ifdef __OPTIMIZE__ 5803 extern __inline __m512 5804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5805 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R) 5806 { 5807 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5808 (__v16sf) 5809 _mm512_undefined_ps (), 5810 (__mmask16) -1, __R); 5811 } 5812 5813 extern __inline __m512 5814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5815 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5816 const int __R) 5817 { 5818 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5819 (__v16sf) __W, 5820 (__mmask16) __U, __R); 5821 } 5822 5823 extern __inline __m512 5824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5825 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) 5826 { 5827 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5828 (__v16sf) 5829 _mm512_setzero_ps (), 5830 (__mmask16) __U, __R); 5831 } 5832 5833 extern __inline __m512 5834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5835 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R) 5836 { 5837 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5838 (__v16sf) 5839 _mm512_undefined_ps (), 5840 (__mmask16) -1, __R); 5841 } 5842 5843 extern __inline __m512 5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5845 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5846 const int __R) 5847 { 5848 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5849 (__v16sf) __W, 5850 (__mmask16) __U, __R); 5851 } 5852 5853 extern __inline __m512 5854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5855 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) 5856 { 5857 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5858 (__v16sf) 5859 _mm512_setzero_ps (), 5860 (__mmask16) __U, __R); 5861 } 5862 5863 #else 5864 #define _mm512_cvt_roundepi32_ps(A, B) \ 5865 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5866 5867 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ 5868 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) 5869 5870 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ 5871 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5872 5873 #define _mm512_cvt_roundepu32_ps(A, B) \ 5874 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5875 5876 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ 5877 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) 5878 5879 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ 5880 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5881 #endif 5882 5883 #ifdef __OPTIMIZE__ 5884 extern __inline __m256d 5885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5886 _mm512_extractf64x4_pd (__m512d __A, const int __imm) 5887 { 5888 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5889 __imm, 5890 (__v4df) 5891 _mm256_undefined_pd (), 5892 (__mmask8) -1); 5893 } 5894 5895 extern __inline __m256d 5896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5897 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, 5898 const int __imm) 5899 { 5900 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5901 __imm, 5902 (__v4df) __W, 5903 (__mmask8) __U); 5904 } 5905 5906 extern __inline __m256d 5907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5908 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) 5909 { 5910 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5911 __imm, 5912 (__v4df) 5913 _mm256_setzero_pd (), 5914 (__mmask8) __U); 5915 } 5916 5917 extern __inline __m128 5918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5919 _mm512_extractf32x4_ps (__m512 __A, const int __imm) 5920 { 5921 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5922 __imm, 5923 (__v4sf) 5924 _mm_undefined_ps (), 5925 (__mmask8) -1); 5926 } 5927 5928 extern __inline __m128 5929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5930 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, 5931 const int __imm) 5932 { 5933 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5934 __imm, 5935 (__v4sf) __W, 5936 (__mmask8) __U); 5937 } 5938 5939 extern __inline __m128 5940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5941 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) 5942 { 5943 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5944 __imm, 5945 (__v4sf) 5946 _mm_setzero_ps (), 5947 (__mmask8) __U); 5948 } 5949 5950 extern __inline __m256i 5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5952 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm) 5953 { 5954 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5955 __imm, 5956 (__v4di) 5957 _mm256_undefined_si256 (), 5958 (__mmask8) -1); 5959 } 5960 5961 extern __inline __m256i 5962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5963 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, 5964 const int __imm) 5965 { 5966 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5967 __imm, 5968 (__v4di) __W, 5969 (__mmask8) __U); 5970 } 5971 5972 extern __inline __m256i 5973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5974 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) 5975 { 5976 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5977 __imm, 5978 (__v4di) 5979 _mm256_setzero_si256 (), 5980 (__mmask8) __U); 5981 } 5982 5983 extern __inline __m128i 5984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5985 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm) 5986 { 5987 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5988 __imm, 5989 (__v4si) 5990 _mm_undefined_si128 (), 5991 (__mmask8) -1); 5992 } 5993 5994 extern __inline __m128i 5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5996 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, 5997 const int __imm) 5998 { 5999 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 6000 __imm, 6001 (__v4si) __W, 6002 (__mmask8) __U); 6003 } 6004 6005 extern __inline __m128i 6006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6007 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) 6008 { 6009 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 6010 __imm, 6011 (__v4si) 6012 _mm_setzero_si128 (), 6013 (__mmask8) __U); 6014 } 6015 #else 6016 6017 #define _mm512_extractf64x4_pd(X, C) \ 6018 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 6019 (int) (C),\ 6020 (__v4df)(__m256d)_mm256_undefined_pd(),\ 6021 (__mmask8)-1)) 6022 6023 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \ 6024 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 6025 (int) (C),\ 6026 (__v4df)(__m256d)(W),\ 6027 (__mmask8)(U))) 6028 6029 #define _mm512_maskz_extractf64x4_pd(U, X, C) \ 6030 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 6031 (int) (C),\ 6032 (__v4df)(__m256d)_mm256_setzero_pd(),\ 6033 (__mmask8)(U))) 6034 6035 #define _mm512_extractf32x4_ps(X, C) \ 6036 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 6037 (int) (C),\ 6038 (__v4sf)(__m128)_mm_undefined_ps(),\ 6039 (__mmask8)-1)) 6040 6041 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \ 6042 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 6043 (int) (C),\ 6044 (__v4sf)(__m128)(W),\ 6045 (__mmask8)(U))) 6046 6047 #define _mm512_maskz_extractf32x4_ps(U, X, C) \ 6048 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 6049 (int) (C),\ 6050 (__v4sf)(__m128)_mm_setzero_ps(),\ 6051 (__mmask8)(U))) 6052 6053 #define _mm512_extracti64x4_epi64(X, C) \ 6054 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 6055 (int) (C),\ 6056 (__v4di)(__m256i)_mm256_undefined_si256 (),\ 6057 (__mmask8)-1)) 6058 6059 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ 6060 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 6061 (int) (C),\ 6062 (__v4di)(__m256i)(W),\ 6063 (__mmask8)(U))) 6064 6065 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \ 6066 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 6067 (int) (C),\ 6068 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 6069 (__mmask8)(U))) 6070 6071 #define _mm512_extracti32x4_epi32(X, C) \ 6072 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 6073 (int) (C),\ 6074 (__v4si)(__m128i)_mm_undefined_si128 (),\ 6075 (__mmask8)-1)) 6076 6077 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ 6078 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 6079 (int) (C),\ 6080 (__v4si)(__m128i)(W),\ 6081 (__mmask8)(U))) 6082 6083 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \ 6084 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 6085 (int) (C),\ 6086 (__v4si)(__m128i)_mm_setzero_si128 (),\ 6087 (__mmask8)(U))) 6088 #endif 6089 6090 #ifdef __OPTIMIZE__ 6091 extern __inline __m512i 6092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6093 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) 6094 { 6095 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, 6096 (__v4si) __B, 6097 __imm, 6098 (__v16si) __A, -1); 6099 } 6100 6101 extern __inline __m512 6102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6103 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) 6104 { 6105 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, 6106 (__v4sf) __B, 6107 __imm, 6108 (__v16sf) __A, -1); 6109 } 6110 6111 extern __inline __m512i 6112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6113 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) 6114 { 6115 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6116 (__v4di) __B, 6117 __imm, 6118 (__v8di) 6119 _mm512_undefined_epi32 (), 6120 (__mmask8) -1); 6121 } 6122 6123 extern __inline __m512i 6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6125 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, 6126 __m256i __B, const int __imm) 6127 { 6128 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6129 (__v4di) __B, 6130 __imm, 6131 (__v8di) __W, 6132 (__mmask8) __U); 6133 } 6134 6135 extern __inline __m512i 6136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6137 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, 6138 const int __imm) 6139 { 6140 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6141 (__v4di) __B, 6142 __imm, 6143 (__v8di) 6144 _mm512_setzero_si512 (), 6145 (__mmask8) __U); 6146 } 6147 6148 extern __inline __m512d 6149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6150 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) 6151 { 6152 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6153 (__v4df) __B, 6154 __imm, 6155 (__v8df) 6156 _mm512_undefined_pd (), 6157 (__mmask8) -1); 6158 } 6159 6160 extern __inline __m512d 6161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6162 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, 6163 __m256d __B, const int __imm) 6164 { 6165 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6166 (__v4df) __B, 6167 __imm, 6168 (__v8df) __W, 6169 (__mmask8) __U); 6170 } 6171 6172 extern __inline __m512d 6173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6174 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, 6175 const int __imm) 6176 { 6177 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6178 (__v4df) __B, 6179 __imm, 6180 (__v8df) 6181 _mm512_setzero_pd (), 6182 (__mmask8) __U); 6183 } 6184 #else 6185 #define _mm512_insertf32x4(X, Y, C) \ 6186 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 6187 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) 6188 6189 #define _mm512_inserti32x4(X, Y, C) \ 6190 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 6191 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) 6192 6193 #define _mm512_insertf64x4(X, Y, C) \ 6194 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6195 (__v4df)(__m256d) (Y), (int) (C), \ 6196 (__v8df)(__m512d)_mm512_undefined_pd(), \ 6197 (__mmask8)-1)) 6198 6199 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \ 6200 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6201 (__v4df)(__m256d) (Y), (int) (C), \ 6202 (__v8df)(__m512d)(W), \ 6203 (__mmask8)(U))) 6204 6205 #define _mm512_maskz_insertf64x4(U, X, Y, C) \ 6206 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6207 (__v4df)(__m256d) (Y), (int) (C), \ 6208 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6209 (__mmask8)(U))) 6210 6211 #define _mm512_inserti64x4(X, Y, C) \ 6212 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6213 (__v4di)(__m256i) (Y), (int) (C), \ 6214 (__v8di)(__m512i)_mm512_undefined_epi32 (), \ 6215 (__mmask8)-1)) 6216 6217 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \ 6218 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6219 (__v4di)(__m256i) (Y), (int) (C),\ 6220 (__v8di)(__m512i)(W),\ 6221 (__mmask8)(U))) 6222 6223 #define _mm512_maskz_inserti64x4(U, X, Y, C) \ 6224 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6225 (__v4di)(__m256i) (Y), (int) (C), \ 6226 (__v8di)(__m512i)_mm512_setzero_si512 (), \ 6227 (__mmask8)(U))) 6228 #endif 6229 6230 extern __inline __m512d 6231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6232 _mm512_loadu_pd (void const *__P) 6233 { 6234 return *(__m512d_u *)__P; 6235 } 6236 6237 extern __inline __m512d 6238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6239 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 6240 { 6241 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 6242 (__v8df) __W, 6243 (__mmask8) __U); 6244 } 6245 6246 extern __inline __m512d 6247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6248 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) 6249 { 6250 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 6251 (__v8df) 6252 _mm512_setzero_pd (), 6253 (__mmask8) __U); 6254 } 6255 6256 extern __inline void 6257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6258 _mm512_storeu_pd (void *__P, __m512d __A) 6259 { 6260 *(__m512d_u *)__P = __A; 6261 } 6262 6263 extern __inline void 6264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6265 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) 6266 { 6267 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A, 6268 (__mmask8) __U); 6269 } 6270 6271 extern __inline __m512 6272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6273 _mm512_loadu_ps (void const *__P) 6274 { 6275 return *(__m512_u *)__P; 6276 } 6277 6278 extern __inline __m512 6279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6280 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 6281 { 6282 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 6283 (__v16sf) __W, 6284 (__mmask16) __U); 6285 } 6286 6287 extern __inline __m512 6288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6289 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) 6290 { 6291 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 6292 (__v16sf) 6293 _mm512_setzero_ps (), 6294 (__mmask16) __U); 6295 } 6296 6297 extern __inline void 6298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6299 _mm512_storeu_ps (void *__P, __m512 __A) 6300 { 6301 *(__m512_u *)__P = __A; 6302 } 6303 6304 extern __inline void 6305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6306 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) 6307 { 6308 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A, 6309 (__mmask16) __U); 6310 } 6311 6312 extern __inline __m128 6313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6314 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P) 6315 { 6316 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U); 6317 } 6318 6319 extern __inline __m128 6320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6321 _mm_maskz_load_ss (__mmask8 __U, const float *__P) 6322 { 6323 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (), 6324 __U); 6325 } 6326 6327 extern __inline __m128d 6328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6329 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P) 6330 { 6331 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U); 6332 } 6333 6334 extern __inline __m128d 6335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6336 _mm_maskz_load_sd (__mmask8 __U, const double *__P) 6337 { 6338 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (), 6339 __U); 6340 } 6341 6342 extern __inline __m128 6343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6344 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6345 { 6346 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B, 6347 (__v4sf) __W, __U); 6348 } 6349 6350 extern __inline __m128 6351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6352 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 6353 { 6354 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B, 6355 (__v4sf) _mm_setzero_ps (), __U); 6356 } 6357 6358 extern __inline __m128d 6359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6360 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6361 { 6362 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B, 6363 (__v2df) __W, __U); 6364 } 6365 6366 extern __inline __m128d 6367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6368 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 6369 { 6370 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B, 6371 (__v2df) _mm_setzero_pd (), 6372 __U); 6373 } 6374 6375 extern __inline void 6376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6377 _mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A) 6378 { 6379 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U); 6380 } 6381 6382 extern __inline void 6383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6384 _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A) 6385 { 6386 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U); 6387 } 6388 6389 extern __inline __m512i 6390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6391 _mm512_loadu_epi64 (void const *__P) 6392 { 6393 return *(__m512i_u *) __P; 6394 } 6395 6396 extern __inline __m512i 6397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6398 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 6399 { 6400 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 6401 (__v8di) __W, 6402 (__mmask8) __U); 6403 } 6404 6405 extern __inline __m512i 6406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6407 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 6408 { 6409 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 6410 (__v8di) 6411 _mm512_setzero_si512 (), 6412 (__mmask8) __U); 6413 } 6414 6415 extern __inline void 6416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6417 _mm512_storeu_epi64 (void *__P, __m512i __A) 6418 { 6419 *(__m512i_u *) __P = (__m512i_u) __A; 6420 } 6421 6422 extern __inline void 6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6424 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) 6425 { 6426 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A, 6427 (__mmask8) __U); 6428 } 6429 6430 extern __inline __m512i 6431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6432 _mm512_loadu_si512 (void const *__P) 6433 { 6434 return *(__m512i_u *)__P; 6435 } 6436 6437 extern __inline __m512i 6438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6439 _mm512_loadu_epi32 (void const *__P) 6440 { 6441 return *(__m512i_u *) __P; 6442 } 6443 6444 extern __inline __m512i 6445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6446 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 6447 { 6448 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 6449 (__v16si) __W, 6450 (__mmask16) __U); 6451 } 6452 6453 extern __inline __m512i 6454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6455 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) 6456 { 6457 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 6458 (__v16si) 6459 _mm512_setzero_si512 (), 6460 (__mmask16) __U); 6461 } 6462 6463 extern __inline void 6464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6465 _mm512_storeu_si512 (void *__P, __m512i __A) 6466 { 6467 *(__m512i_u *)__P = __A; 6468 } 6469 6470 extern __inline void 6471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6472 _mm512_storeu_epi32 (void *__P, __m512i __A) 6473 { 6474 *(__m512i_u *) __P = (__m512i_u) __A; 6475 } 6476 6477 extern __inline void 6478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6479 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) 6480 { 6481 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 6482 (__mmask16) __U); 6483 } 6484 6485 extern __inline __m512d 6486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6487 _mm512_permutevar_pd (__m512d __A, __m512i __C) 6488 { 6489 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6490 (__v8di) __C, 6491 (__v8df) 6492 _mm512_undefined_pd (), 6493 (__mmask8) -1); 6494 } 6495 6496 extern __inline __m512d 6497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6498 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6499 { 6500 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6501 (__v8di) __C, 6502 (__v8df) __W, 6503 (__mmask8) __U); 6504 } 6505 6506 extern __inline __m512d 6507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6508 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) 6509 { 6510 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6511 (__v8di) __C, 6512 (__v8df) 6513 _mm512_setzero_pd (), 6514 (__mmask8) __U); 6515 } 6516 6517 extern __inline __m512 6518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6519 _mm512_permutevar_ps (__m512 __A, __m512i __C) 6520 { 6521 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6522 (__v16si) __C, 6523 (__v16sf) 6524 _mm512_undefined_ps (), 6525 (__mmask16) -1); 6526 } 6527 6528 extern __inline __m512 6529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6530 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6531 { 6532 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6533 (__v16si) __C, 6534 (__v16sf) __W, 6535 (__mmask16) __U); 6536 } 6537 6538 extern __inline __m512 6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6540 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) 6541 { 6542 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6543 (__v16si) __C, 6544 (__v16sf) 6545 _mm512_setzero_ps (), 6546 (__mmask16) __U); 6547 } 6548 6549 extern __inline __m512i 6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6551 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) 6552 { 6553 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 6554 /* idx */ , 6555 (__v8di) __A, 6556 (__v8di) __B, 6557 (__mmask8) -1); 6558 } 6559 6560 extern __inline __m512i 6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6562 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 6563 __m512i __B) 6564 { 6565 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 6566 /* idx */ , 6567 (__v8di) __A, 6568 (__v8di) __B, 6569 (__mmask8) __U); 6570 } 6571 6572 extern __inline __m512i 6573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6574 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6575 __mmask8 __U, __m512i __B) 6576 { 6577 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6578 (__v8di) __I 6579 /* idx */ , 6580 (__v8di) __B, 6581 (__mmask8) __U); 6582 } 6583 6584 extern __inline __m512i 6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6586 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 6587 __m512i __I, __m512i __B) 6588 { 6589 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 6590 /* idx */ , 6591 (__v8di) __A, 6592 (__v8di) __B, 6593 (__mmask8) __U); 6594 } 6595 6596 extern __inline __m512i 6597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6598 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) 6599 { 6600 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 6601 /* idx */ , 6602 (__v16si) __A, 6603 (__v16si) __B, 6604 (__mmask16) -1); 6605 } 6606 6607 extern __inline __m512i 6608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6609 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 6610 __m512i __I, __m512i __B) 6611 { 6612 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 6613 /* idx */ , 6614 (__v16si) __A, 6615 (__v16si) __B, 6616 (__mmask16) __U); 6617 } 6618 6619 extern __inline __m512i 6620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6621 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6622 __mmask16 __U, __m512i __B) 6623 { 6624 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6625 (__v16si) __I 6626 /* idx */ , 6627 (__v16si) __B, 6628 (__mmask16) __U); 6629 } 6630 6631 extern __inline __m512i 6632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6633 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 6634 __m512i __I, __m512i __B) 6635 { 6636 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 6637 /* idx */ , 6638 (__v16si) __A, 6639 (__v16si) __B, 6640 (__mmask16) __U); 6641 } 6642 6643 extern __inline __m512d 6644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6645 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) 6646 { 6647 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6648 /* idx */ , 6649 (__v8df) __A, 6650 (__v8df) __B, 6651 (__mmask8) -1); 6652 } 6653 6654 extern __inline __m512d 6655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6656 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, 6657 __m512d __B) 6658 { 6659 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6660 /* idx */ , 6661 (__v8df) __A, 6662 (__v8df) __B, 6663 (__mmask8) __U); 6664 } 6665 6666 extern __inline __m512d 6667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6668 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6669 __m512d __B) 6670 { 6671 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6672 (__v8di) __I 6673 /* idx */ , 6674 (__v8df) __B, 6675 (__mmask8) __U); 6676 } 6677 6678 extern __inline __m512d 6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6680 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6681 __m512d __B) 6682 { 6683 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6684 /* idx */ , 6685 (__v8df) __A, 6686 (__v8df) __B, 6687 (__mmask8) __U); 6688 } 6689 6690 extern __inline __m512 6691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6692 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) 6693 { 6694 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6695 /* idx */ , 6696 (__v16sf) __A, 6697 (__v16sf) __B, 6698 (__mmask16) -1); 6699 } 6700 6701 extern __inline __m512 6702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6703 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6704 { 6705 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6706 /* idx */ , 6707 (__v16sf) __A, 6708 (__v16sf) __B, 6709 (__mmask16) __U); 6710 } 6711 6712 extern __inline __m512 6713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6714 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6715 __m512 __B) 6716 { 6717 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6718 (__v16si) __I 6719 /* idx */ , 6720 (__v16sf) __B, 6721 (__mmask16) __U); 6722 } 6723 6724 extern __inline __m512 6725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6726 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6727 __m512 __B) 6728 { 6729 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6730 /* idx */ , 6731 (__v16sf) __A, 6732 (__v16sf) __B, 6733 (__mmask16) __U); 6734 } 6735 6736 #ifdef __OPTIMIZE__ 6737 extern __inline __m512d 6738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6739 _mm512_permute_pd (__m512d __X, const int __C) 6740 { 6741 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6742 (__v8df) 6743 _mm512_undefined_pd (), 6744 (__mmask8) -1); 6745 } 6746 6747 extern __inline __m512d 6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6749 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) 6750 { 6751 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6752 (__v8df) __W, 6753 (__mmask8) __U); 6754 } 6755 6756 extern __inline __m512d 6757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6758 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) 6759 { 6760 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6761 (__v8df) 6762 _mm512_setzero_pd (), 6763 (__mmask8) __U); 6764 } 6765 6766 extern __inline __m512 6767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6768 _mm512_permute_ps (__m512 __X, const int __C) 6769 { 6770 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6771 (__v16sf) 6772 _mm512_undefined_ps (), 6773 (__mmask16) -1); 6774 } 6775 6776 extern __inline __m512 6777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6778 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) 6779 { 6780 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6781 (__v16sf) __W, 6782 (__mmask16) __U); 6783 } 6784 6785 extern __inline __m512 6786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6787 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) 6788 { 6789 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6790 (__v16sf) 6791 _mm512_setzero_ps (), 6792 (__mmask16) __U); 6793 } 6794 #else 6795 #define _mm512_permute_pd(X, C) \ 6796 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6797 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6798 (__mmask8)(-1))) 6799 6800 #define _mm512_mask_permute_pd(W, U, X, C) \ 6801 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6802 (__v8df)(__m512d)(W), \ 6803 (__mmask8)(U))) 6804 6805 #define _mm512_maskz_permute_pd(U, X, C) \ 6806 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6807 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6808 (__mmask8)(U))) 6809 6810 #define _mm512_permute_ps(X, C) \ 6811 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6812 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6813 (__mmask16)(-1))) 6814 6815 #define _mm512_mask_permute_ps(W, U, X, C) \ 6816 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6817 (__v16sf)(__m512)(W), \ 6818 (__mmask16)(U))) 6819 6820 #define _mm512_maskz_permute_ps(U, X, C) \ 6821 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6822 (__v16sf)(__m512)_mm512_setzero_ps(), \ 6823 (__mmask16)(U))) 6824 #endif 6825 6826 #ifdef __OPTIMIZE__ 6827 extern __inline __m512i 6828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6829 _mm512_permutex_epi64 (__m512i __X, const int __I) 6830 { 6831 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6832 (__v8di) 6833 _mm512_undefined_epi32 (), 6834 (__mmask8) (-1)); 6835 } 6836 6837 extern __inline __m512i 6838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6839 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, 6840 __m512i __X, const int __I) 6841 { 6842 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6843 (__v8di) __W, 6844 (__mmask8) __M); 6845 } 6846 6847 extern __inline __m512i 6848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6849 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) 6850 { 6851 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6852 (__v8di) 6853 _mm512_setzero_si512 (), 6854 (__mmask8) __M); 6855 } 6856 6857 extern __inline __m512d 6858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6859 _mm512_permutex_pd (__m512d __X, const int __M) 6860 { 6861 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6862 (__v8df) 6863 _mm512_undefined_pd (), 6864 (__mmask8) -1); 6865 } 6866 6867 extern __inline __m512d 6868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6869 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) 6870 { 6871 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6872 (__v8df) __W, 6873 (__mmask8) __U); 6874 } 6875 6876 extern __inline __m512d 6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6878 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) 6879 { 6880 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6881 (__v8df) 6882 _mm512_setzero_pd (), 6883 (__mmask8) __U); 6884 } 6885 #else 6886 #define _mm512_permutex_pd(X, M) \ 6887 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6888 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6889 (__mmask8)-1)) 6890 6891 #define _mm512_mask_permutex_pd(W, U, X, M) \ 6892 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6893 (__v8df)(__m512d)(W), (__mmask8)(U))) 6894 6895 #define _mm512_maskz_permutex_pd(U, X, M) \ 6896 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6897 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6898 (__mmask8)(U))) 6899 6900 #define _mm512_permutex_epi64(X, I) \ 6901 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6902 (int)(I), \ 6903 (__v8di)(__m512i) \ 6904 (_mm512_undefined_epi32 ()),\ 6905 (__mmask8)(-1))) 6906 6907 #define _mm512_maskz_permutex_epi64(M, X, I) \ 6908 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6909 (int)(I), \ 6910 (__v8di)(__m512i) \ 6911 (_mm512_setzero_si512 ()),\ 6912 (__mmask8)(M))) 6913 6914 #define _mm512_mask_permutex_epi64(W, M, X, I) \ 6915 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6916 (int)(I), \ 6917 (__v8di)(__m512i)(W), \ 6918 (__mmask8)(M))) 6919 #endif 6920 6921 extern __inline __m512i 6922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6923 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 6924 { 6925 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6926 (__v8di) __X, 6927 (__v8di) 6928 _mm512_setzero_si512 (), 6929 __M); 6930 } 6931 6932 extern __inline __m512i 6933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6934 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 6935 { 6936 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6937 (__v8di) __X, 6938 (__v8di) 6939 _mm512_undefined_epi32 (), 6940 (__mmask8) -1); 6941 } 6942 6943 extern __inline __m512i 6944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6945 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 6946 __m512i __Y) 6947 { 6948 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6949 (__v8di) __X, 6950 (__v8di) __W, 6951 __M); 6952 } 6953 6954 extern __inline __m512i 6955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6956 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 6957 { 6958 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6959 (__v16si) __X, 6960 (__v16si) 6961 _mm512_setzero_si512 (), 6962 __M); 6963 } 6964 6965 extern __inline __m512i 6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6967 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 6968 { 6969 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6970 (__v16si) __X, 6971 (__v16si) 6972 _mm512_undefined_epi32 (), 6973 (__mmask16) -1); 6974 } 6975 6976 extern __inline __m512i 6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6978 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 6979 __m512i __Y) 6980 { 6981 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6982 (__v16si) __X, 6983 (__v16si) __W, 6984 __M); 6985 } 6986 6987 extern __inline __m512d 6988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6989 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) 6990 { 6991 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6992 (__v8di) __X, 6993 (__v8df) 6994 _mm512_undefined_pd (), 6995 (__mmask8) -1); 6996 } 6997 6998 extern __inline __m512d 6999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7000 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 7001 { 7002 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 7003 (__v8di) __X, 7004 (__v8df) __W, 7005 (__mmask8) __U); 7006 } 7007 7008 extern __inline __m512d 7009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7010 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 7011 { 7012 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 7013 (__v8di) __X, 7014 (__v8df) 7015 _mm512_setzero_pd (), 7016 (__mmask8) __U); 7017 } 7018 7019 extern __inline __m512 7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7021 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) 7022 { 7023 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 7024 (__v16si) __X, 7025 (__v16sf) 7026 _mm512_undefined_ps (), 7027 (__mmask16) -1); 7028 } 7029 7030 extern __inline __m512 7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7032 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 7033 { 7034 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 7035 (__v16si) __X, 7036 (__v16sf) __W, 7037 (__mmask16) __U); 7038 } 7039 7040 extern __inline __m512 7041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7042 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 7043 { 7044 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 7045 (__v16si) __X, 7046 (__v16sf) 7047 _mm512_setzero_ps (), 7048 (__mmask16) __U); 7049 } 7050 7051 #ifdef __OPTIMIZE__ 7052 extern __inline __m512 7053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7054 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) 7055 { 7056 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 7057 (__v16sf) __V, __imm, 7058 (__v16sf) 7059 _mm512_undefined_ps (), 7060 (__mmask16) -1); 7061 } 7062 7063 extern __inline __m512 7064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7065 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, 7066 __m512 __V, const int __imm) 7067 { 7068 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 7069 (__v16sf) __V, __imm, 7070 (__v16sf) __W, 7071 (__mmask16) __U); 7072 } 7073 7074 extern __inline __m512 7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7076 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) 7077 { 7078 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 7079 (__v16sf) __V, __imm, 7080 (__v16sf) 7081 _mm512_setzero_ps (), 7082 (__mmask16) __U); 7083 } 7084 7085 extern __inline __m512d 7086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7087 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) 7088 { 7089 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 7090 (__v8df) __V, __imm, 7091 (__v8df) 7092 _mm512_undefined_pd (), 7093 (__mmask8) -1); 7094 } 7095 7096 extern __inline __m512d 7097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7098 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, 7099 __m512d __V, const int __imm) 7100 { 7101 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 7102 (__v8df) __V, __imm, 7103 (__v8df) __W, 7104 (__mmask8) __U); 7105 } 7106 7107 extern __inline __m512d 7108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7109 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, 7110 const int __imm) 7111 { 7112 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 7113 (__v8df) __V, __imm, 7114 (__v8df) 7115 _mm512_setzero_pd (), 7116 (__mmask8) __U); 7117 } 7118 7119 extern __inline __m512d 7120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7121 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, 7122 const int __imm, const int __R) 7123 { 7124 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 7125 (__v8df) __B, 7126 (__v8di) __C, 7127 __imm, 7128 (__mmask8) -1, __R); 7129 } 7130 7131 extern __inline __m512d 7132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7133 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 7134 __m512i __C, const int __imm, const int __R) 7135 { 7136 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 7137 (__v8df) __B, 7138 (__v8di) __C, 7139 __imm, 7140 (__mmask8) __U, __R); 7141 } 7142 7143 extern __inline __m512d 7144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7145 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 7146 __m512i __C, const int __imm, const int __R) 7147 { 7148 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 7149 (__v8df) __B, 7150 (__v8di) __C, 7151 __imm, 7152 (__mmask8) __U, __R); 7153 } 7154 7155 extern __inline __m512 7156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7157 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, 7158 const int __imm, const int __R) 7159 { 7160 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 7161 (__v16sf) __B, 7162 (__v16si) __C, 7163 __imm, 7164 (__mmask16) -1, __R); 7165 } 7166 7167 extern __inline __m512 7168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7169 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 7170 __m512i __C, const int __imm, const int __R) 7171 { 7172 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 7173 (__v16sf) __B, 7174 (__v16si) __C, 7175 __imm, 7176 (__mmask16) __U, __R); 7177 } 7178 7179 extern __inline __m512 7180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7181 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 7182 __m512i __C, const int __imm, const int __R) 7183 { 7184 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 7185 (__v16sf) __B, 7186 (__v16si) __C, 7187 __imm, 7188 (__mmask16) __U, __R); 7189 } 7190 7191 extern __inline __m128d 7192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7193 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, 7194 const int __imm, const int __R) 7195 { 7196 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 7197 (__v2df) __B, 7198 (__v2di) __C, __imm, 7199 (__mmask8) -1, __R); 7200 } 7201 7202 extern __inline __m128d 7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7204 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, 7205 __m128i __C, const int __imm, const int __R) 7206 { 7207 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 7208 (__v2df) __B, 7209 (__v2di) __C, __imm, 7210 (__mmask8) __U, __R); 7211 } 7212 7213 extern __inline __m128d 7214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7215 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 7216 __m128i __C, const int __imm, const int __R) 7217 { 7218 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 7219 (__v2df) __B, 7220 (__v2di) __C, 7221 __imm, 7222 (__mmask8) __U, __R); 7223 } 7224 7225 extern __inline __m128 7226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7227 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, 7228 const int __imm, const int __R) 7229 { 7230 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 7231 (__v4sf) __B, 7232 (__v4si) __C, __imm, 7233 (__mmask8) -1, __R); 7234 } 7235 7236 extern __inline __m128 7237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7238 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, 7239 __m128i __C, const int __imm, const int __R) 7240 { 7241 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 7242 (__v4sf) __B, 7243 (__v4si) __C, __imm, 7244 (__mmask8) __U, __R); 7245 } 7246 7247 extern __inline __m128 7248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7249 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 7250 __m128i __C, const int __imm, const int __R) 7251 { 7252 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 7253 (__v4sf) __B, 7254 (__v4si) __C, __imm, 7255 (__mmask8) __U, __R); 7256 } 7257 7258 #else 7259 #define _mm512_shuffle_pd(X, Y, C) \ 7260 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7261 (__v8df)(__m512d)(Y), (int)(C),\ 7262 (__v8df)(__m512d)_mm512_undefined_pd(),\ 7263 (__mmask8)-1)) 7264 7265 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ 7266 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7267 (__v8df)(__m512d)(Y), (int)(C),\ 7268 (__v8df)(__m512d)(W),\ 7269 (__mmask8)(U))) 7270 7271 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \ 7272 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7273 (__v8df)(__m512d)(Y), (int)(C),\ 7274 (__v8df)(__m512d)_mm512_setzero_pd(),\ 7275 (__mmask8)(U))) 7276 7277 #define _mm512_shuffle_ps(X, Y, C) \ 7278 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7279 (__v16sf)(__m512)(Y), (int)(C),\ 7280 (__v16sf)(__m512)_mm512_undefined_ps(),\ 7281 (__mmask16)-1)) 7282 7283 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ 7284 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7285 (__v16sf)(__m512)(Y), (int)(C),\ 7286 (__v16sf)(__m512)(W),\ 7287 (__mmask16)(U))) 7288 7289 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \ 7290 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7291 (__v16sf)(__m512)(Y), (int)(C),\ 7292 (__v16sf)(__m512)_mm512_setzero_ps(),\ 7293 (__mmask16)(U))) 7294 7295 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ 7296 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 7297 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7298 (__mmask8)(-1), (R))) 7299 7300 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ 7301 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 7302 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7303 (__mmask8)(U), (R))) 7304 7305 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ 7306 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 7307 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7308 (__mmask8)(U), (R))) 7309 7310 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ 7311 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 7312 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7313 (__mmask16)(-1), (R))) 7314 7315 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ 7316 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 7317 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7318 (__mmask16)(U), (R))) 7319 7320 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ 7321 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 7322 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7323 (__mmask16)(U), (R))) 7324 7325 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ 7326 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 7327 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7328 (__mmask8)(-1), (R))) 7329 7330 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ 7331 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 7332 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7333 (__mmask8)(U), (R))) 7334 7335 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ 7336 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 7337 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7338 (__mmask8)(U), (R))) 7339 7340 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ 7341 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 7342 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7343 (__mmask8)(-1), (R))) 7344 7345 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ 7346 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 7347 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7348 (__mmask8)(U), (R))) 7349 7350 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ 7351 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 7352 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7353 (__mmask8)(U), (R))) 7354 #endif 7355 7356 extern __inline __m512 7357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7358 _mm512_movehdup_ps (__m512 __A) 7359 { 7360 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7361 (__v16sf) 7362 _mm512_undefined_ps (), 7363 (__mmask16) -1); 7364 } 7365 7366 extern __inline __m512 7367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7368 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 7369 { 7370 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7371 (__v16sf) __W, 7372 (__mmask16) __U); 7373 } 7374 7375 extern __inline __m512 7376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7377 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 7378 { 7379 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7380 (__v16sf) 7381 _mm512_setzero_ps (), 7382 (__mmask16) __U); 7383 } 7384 7385 extern __inline __m512 7386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7387 _mm512_moveldup_ps (__m512 __A) 7388 { 7389 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7390 (__v16sf) 7391 _mm512_undefined_ps (), 7392 (__mmask16) -1); 7393 } 7394 7395 extern __inline __m512 7396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7397 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 7398 { 7399 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7400 (__v16sf) __W, 7401 (__mmask16) __U); 7402 } 7403 7404 extern __inline __m512 7405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7406 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 7407 { 7408 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7409 (__v16sf) 7410 _mm512_setzero_ps (), 7411 (__mmask16) __U); 7412 } 7413 7414 extern __inline __m512i 7415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7416 _mm512_or_si512 (__m512i __A, __m512i __B) 7417 { 7418 return (__m512i) ((__v16su) __A | (__v16su) __B); 7419 } 7420 7421 extern __inline __m512i 7422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7423 _mm512_or_epi32 (__m512i __A, __m512i __B) 7424 { 7425 return (__m512i) ((__v16su) __A | (__v16su) __B); 7426 } 7427 7428 extern __inline __m512i 7429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7430 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7431 { 7432 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 7433 (__v16si) __B, 7434 (__v16si) __W, 7435 (__mmask16) __U); 7436 } 7437 7438 extern __inline __m512i 7439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7440 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7441 { 7442 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 7443 (__v16si) __B, 7444 (__v16si) 7445 _mm512_setzero_si512 (), 7446 (__mmask16) __U); 7447 } 7448 7449 extern __inline __m512i 7450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7451 _mm512_or_epi64 (__m512i __A, __m512i __B) 7452 { 7453 return (__m512i) ((__v8du) __A | (__v8du) __B); 7454 } 7455 7456 extern __inline __m512i 7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7458 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7459 { 7460 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 7461 (__v8di) __B, 7462 (__v8di) __W, 7463 (__mmask8) __U); 7464 } 7465 7466 extern __inline __m512i 7467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7468 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7469 { 7470 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 7471 (__v8di) __B, 7472 (__v8di) 7473 _mm512_setzero_si512 (), 7474 (__mmask8) __U); 7475 } 7476 7477 extern __inline __m512i 7478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7479 _mm512_xor_si512 (__m512i __A, __m512i __B) 7480 { 7481 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 7482 } 7483 7484 extern __inline __m512i 7485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7486 _mm512_xor_epi32 (__m512i __A, __m512i __B) 7487 { 7488 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 7489 } 7490 7491 extern __inline __m512i 7492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7493 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7494 { 7495 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 7496 (__v16si) __B, 7497 (__v16si) __W, 7498 (__mmask16) __U); 7499 } 7500 7501 extern __inline __m512i 7502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7503 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7504 { 7505 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 7506 (__v16si) __B, 7507 (__v16si) 7508 _mm512_setzero_si512 (), 7509 (__mmask16) __U); 7510 } 7511 7512 extern __inline __m512i 7513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7514 _mm512_xor_epi64 (__m512i __A, __m512i __B) 7515 { 7516 return (__m512i) ((__v8du) __A ^ (__v8du) __B); 7517 } 7518 7519 extern __inline __m512i 7520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7521 _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7522 { 7523 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 7524 (__v8di) __B, 7525 (__v8di) __W, 7526 (__mmask8) __U); 7527 } 7528 7529 extern __inline __m512i 7530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7531 _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7532 { 7533 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 7534 (__v8di) __B, 7535 (__v8di) 7536 _mm512_setzero_si512 (), 7537 (__mmask8) __U); 7538 } 7539 7540 #ifdef __OPTIMIZE__ 7541 extern __inline __m512i 7542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7543 _mm512_rol_epi32 (__m512i __A, const int __B) 7544 { 7545 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7546 (__v16si) 7547 _mm512_undefined_epi32 (), 7548 (__mmask16) -1); 7549 } 7550 7551 extern __inline __m512i 7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7553 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) 7554 { 7555 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7556 (__v16si) __W, 7557 (__mmask16) __U); 7558 } 7559 7560 extern __inline __m512i 7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7562 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) 7563 { 7564 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7565 (__v16si) 7566 _mm512_setzero_si512 (), 7567 (__mmask16) __U); 7568 } 7569 7570 extern __inline __m512i 7571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7572 _mm512_ror_epi32 (__m512i __A, int __B) 7573 { 7574 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7575 (__v16si) 7576 _mm512_undefined_epi32 (), 7577 (__mmask16) -1); 7578 } 7579 7580 extern __inline __m512i 7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7582 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) 7583 { 7584 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7585 (__v16si) __W, 7586 (__mmask16) __U); 7587 } 7588 7589 extern __inline __m512i 7590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7591 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) 7592 { 7593 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7594 (__v16si) 7595 _mm512_setzero_si512 (), 7596 (__mmask16) __U); 7597 } 7598 7599 extern __inline __m512i 7600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7601 _mm512_rol_epi64 (__m512i __A, const int __B) 7602 { 7603 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7604 (__v8di) 7605 _mm512_undefined_epi32 (), 7606 (__mmask8) -1); 7607 } 7608 7609 extern __inline __m512i 7610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7611 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) 7612 { 7613 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7614 (__v8di) __W, 7615 (__mmask8) __U); 7616 } 7617 7618 extern __inline __m512i 7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7620 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) 7621 { 7622 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7623 (__v8di) 7624 _mm512_setzero_si512 (), 7625 (__mmask8) __U); 7626 } 7627 7628 extern __inline __m512i 7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7630 _mm512_ror_epi64 (__m512i __A, int __B) 7631 { 7632 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7633 (__v8di) 7634 _mm512_undefined_epi32 (), 7635 (__mmask8) -1); 7636 } 7637 7638 extern __inline __m512i 7639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7640 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) 7641 { 7642 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7643 (__v8di) __W, 7644 (__mmask8) __U); 7645 } 7646 7647 extern __inline __m512i 7648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7649 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) 7650 { 7651 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7652 (__v8di) 7653 _mm512_setzero_si512 (), 7654 (__mmask8) __U); 7655 } 7656 7657 #else 7658 #define _mm512_rol_epi32(A, B) \ 7659 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7660 (int)(B), \ 7661 (__v16si)_mm512_undefined_epi32 (), \ 7662 (__mmask16)(-1))) 7663 #define _mm512_mask_rol_epi32(W, U, A, B) \ 7664 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7665 (int)(B), \ 7666 (__v16si)(__m512i)(W), \ 7667 (__mmask16)(U))) 7668 #define _mm512_maskz_rol_epi32(U, A, B) \ 7669 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7670 (int)(B), \ 7671 (__v16si)_mm512_setzero_si512 (), \ 7672 (__mmask16)(U))) 7673 #define _mm512_ror_epi32(A, B) \ 7674 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7675 (int)(B), \ 7676 (__v16si)_mm512_undefined_epi32 (), \ 7677 (__mmask16)(-1))) 7678 #define _mm512_mask_ror_epi32(W, U, A, B) \ 7679 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7680 (int)(B), \ 7681 (__v16si)(__m512i)(W), \ 7682 (__mmask16)(U))) 7683 #define _mm512_maskz_ror_epi32(U, A, B) \ 7684 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7685 (int)(B), \ 7686 (__v16si)_mm512_setzero_si512 (), \ 7687 (__mmask16)(U))) 7688 #define _mm512_rol_epi64(A, B) \ 7689 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7690 (int)(B), \ 7691 (__v8di)_mm512_undefined_epi32 (), \ 7692 (__mmask8)(-1))) 7693 #define _mm512_mask_rol_epi64(W, U, A, B) \ 7694 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7695 (int)(B), \ 7696 (__v8di)(__m512i)(W), \ 7697 (__mmask8)(U))) 7698 #define _mm512_maskz_rol_epi64(U, A, B) \ 7699 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7700 (int)(B), \ 7701 (__v8di)_mm512_setzero_si512 (), \ 7702 (__mmask8)(U))) 7703 7704 #define _mm512_ror_epi64(A, B) \ 7705 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7706 (int)(B), \ 7707 (__v8di)_mm512_undefined_epi32 (), \ 7708 (__mmask8)(-1))) 7709 #define _mm512_mask_ror_epi64(W, U, A, B) \ 7710 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7711 (int)(B), \ 7712 (__v8di)(__m512i)(W), \ 7713 (__mmask8)(U))) 7714 #define _mm512_maskz_ror_epi64(U, A, B) \ 7715 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7716 (int)(B), \ 7717 (__v8di)_mm512_setzero_si512 (), \ 7718 (__mmask8)(U))) 7719 #endif 7720 7721 extern __inline __m512i 7722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7723 _mm512_and_si512 (__m512i __A, __m512i __B) 7724 { 7725 return (__m512i) ((__v16su) __A & (__v16su) __B); 7726 } 7727 7728 extern __inline __m512i 7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7730 _mm512_and_epi32 (__m512i __A, __m512i __B) 7731 { 7732 return (__m512i) ((__v16su) __A & (__v16su) __B); 7733 } 7734 7735 extern __inline __m512i 7736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7737 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7738 { 7739 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7740 (__v16si) __B, 7741 (__v16si) __W, 7742 (__mmask16) __U); 7743 } 7744 7745 extern __inline __m512i 7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7747 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7748 { 7749 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7750 (__v16si) __B, 7751 (__v16si) 7752 _mm512_setzero_si512 (), 7753 (__mmask16) __U); 7754 } 7755 7756 extern __inline __m512i 7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7758 _mm512_and_epi64 (__m512i __A, __m512i __B) 7759 { 7760 return (__m512i) ((__v8du) __A & (__v8du) __B); 7761 } 7762 7763 extern __inline __m512i 7764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7765 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7766 { 7767 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7768 (__v8di) __B, 7769 (__v8di) __W, __U); 7770 } 7771 7772 extern __inline __m512i 7773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7774 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7775 { 7776 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7777 (__v8di) __B, 7778 (__v8di) 7779 _mm512_setzero_pd (), 7780 __U); 7781 } 7782 7783 extern __inline __m512i 7784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7785 _mm512_andnot_si512 (__m512i __A, __m512i __B) 7786 { 7787 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7788 (__v16si) __B, 7789 (__v16si) 7790 _mm512_undefined_epi32 (), 7791 (__mmask16) -1); 7792 } 7793 7794 extern __inline __m512i 7795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7796 _mm512_andnot_epi32 (__m512i __A, __m512i __B) 7797 { 7798 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7799 (__v16si) __B, 7800 (__v16si) 7801 _mm512_undefined_epi32 (), 7802 (__mmask16) -1); 7803 } 7804 7805 extern __inline __m512i 7806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7807 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7808 { 7809 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7810 (__v16si) __B, 7811 (__v16si) __W, 7812 (__mmask16) __U); 7813 } 7814 7815 extern __inline __m512i 7816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7817 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7818 { 7819 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7820 (__v16si) __B, 7821 (__v16si) 7822 _mm512_setzero_si512 (), 7823 (__mmask16) __U); 7824 } 7825 7826 extern __inline __m512i 7827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7828 _mm512_andnot_epi64 (__m512i __A, __m512i __B) 7829 { 7830 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7831 (__v8di) __B, 7832 (__v8di) 7833 _mm512_undefined_epi32 (), 7834 (__mmask8) -1); 7835 } 7836 7837 extern __inline __m512i 7838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7839 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7840 { 7841 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7842 (__v8di) __B, 7843 (__v8di) __W, __U); 7844 } 7845 7846 extern __inline __m512i 7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7848 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7849 { 7850 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7851 (__v8di) __B, 7852 (__v8di) 7853 _mm512_setzero_pd (), 7854 __U); 7855 } 7856 7857 extern __inline __mmask16 7858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7859 _mm512_test_epi32_mask (__m512i __A, __m512i __B) 7860 { 7861 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7862 (__v16si) __B, 7863 (__mmask16) -1); 7864 } 7865 7866 extern __inline __mmask16 7867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7868 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7869 { 7870 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7871 (__v16si) __B, __U); 7872 } 7873 7874 extern __inline __mmask8 7875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7876 _mm512_test_epi64_mask (__m512i __A, __m512i __B) 7877 { 7878 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 7879 (__v8di) __B, 7880 (__mmask8) -1); 7881 } 7882 7883 extern __inline __mmask8 7884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7885 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7886 { 7887 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 7888 } 7889 7890 extern __inline __mmask16 7891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7892 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) 7893 { 7894 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7895 (__v16si) __B, 7896 (__mmask16) -1); 7897 } 7898 7899 extern __inline __mmask16 7900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7901 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7902 { 7903 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7904 (__v16si) __B, __U); 7905 } 7906 7907 extern __inline __mmask8 7908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7909 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) 7910 { 7911 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7912 (__v8di) __B, 7913 (__mmask8) -1); 7914 } 7915 7916 extern __inline __mmask8 7917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7918 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7919 { 7920 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7921 (__v8di) __B, __U); 7922 } 7923 7924 extern __inline __m512 7925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7926 _mm512_abs_ps (__m512 __A) 7927 { 7928 return (__m512) _mm512_and_epi32 ((__m512i) __A, 7929 _mm512_set1_epi32 (0x7fffffff)); 7930 } 7931 7932 extern __inline __m512 7933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7934 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A) 7935 { 7936 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A, 7937 _mm512_set1_epi32 (0x7fffffff)); 7938 } 7939 7940 extern __inline __m512d 7941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7942 _mm512_abs_pd (__m512d __A) 7943 { 7944 return (__m512d) _mm512_and_epi64 ((__m512i) __A, 7945 _mm512_set1_epi64 (0x7fffffffffffffffLL)); 7946 } 7947 7948 extern __inline __m512d 7949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7950 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A) 7951 { 7952 return (__m512d) 7953 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A, 7954 _mm512_set1_epi64 (0x7fffffffffffffffLL)); 7955 } 7956 7957 extern __inline __m512i 7958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7959 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B) 7960 { 7961 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7962 (__v16si) __B, 7963 (__v16si) 7964 _mm512_undefined_epi32 (), 7965 (__mmask16) -1); 7966 } 7967 7968 extern __inline __m512i 7969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7970 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7971 __m512i __B) 7972 { 7973 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7974 (__v16si) __B, 7975 (__v16si) __W, 7976 (__mmask16) __U); 7977 } 7978 7979 extern __inline __m512i 7980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7981 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7982 { 7983 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7984 (__v16si) __B, 7985 (__v16si) 7986 _mm512_setzero_si512 (), 7987 (__mmask16) __U); 7988 } 7989 7990 extern __inline __m512i 7991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7992 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B) 7993 { 7994 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7995 (__v8di) __B, 7996 (__v8di) 7997 _mm512_undefined_epi32 (), 7998 (__mmask8) -1); 7999 } 8000 8001 extern __inline __m512i 8002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8003 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 8004 { 8005 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 8006 (__v8di) __B, 8007 (__v8di) __W, 8008 (__mmask8) __U); 8009 } 8010 8011 extern __inline __m512i 8012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8013 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 8014 { 8015 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 8016 (__v8di) __B, 8017 (__v8di) 8018 _mm512_setzero_si512 (), 8019 (__mmask8) __U); 8020 } 8021 8022 extern __inline __m512i 8023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8024 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B) 8025 { 8026 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 8027 (__v16si) __B, 8028 (__v16si) 8029 _mm512_undefined_epi32 (), 8030 (__mmask16) -1); 8031 } 8032 8033 extern __inline __m512i 8034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8035 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 8036 __m512i __B) 8037 { 8038 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 8039 (__v16si) __B, 8040 (__v16si) __W, 8041 (__mmask16) __U); 8042 } 8043 8044 extern __inline __m512i 8045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8046 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 8047 { 8048 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 8049 (__v16si) __B, 8050 (__v16si) 8051 _mm512_setzero_si512 (), 8052 (__mmask16) __U); 8053 } 8054 8055 extern __inline __m512i 8056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8057 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 8058 { 8059 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 8060 (__v8di) __B, 8061 (__v8di) 8062 _mm512_undefined_epi32 (), 8063 (__mmask8) -1); 8064 } 8065 8066 extern __inline __m512i 8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8068 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 8069 { 8070 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 8071 (__v8di) __B, 8072 (__v8di) __W, 8073 (__mmask8) __U); 8074 } 8075 8076 extern __inline __m512i 8077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8078 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 8079 { 8080 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 8081 (__v8di) __B, 8082 (__v8di) 8083 _mm512_setzero_si512 (), 8084 (__mmask8) __U); 8085 } 8086 8087 #ifdef __x86_64__ 8088 #ifdef __OPTIMIZE__ 8089 extern __inline unsigned long long 8090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8091 _mm_cvt_roundss_u64 (__m128 __A, const int __R) 8092 { 8093 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); 8094 } 8095 8096 extern __inline long long 8097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8098 _mm_cvt_roundss_si64 (__m128 __A, const int __R) 8099 { 8100 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 8101 } 8102 8103 extern __inline long long 8104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8105 _mm_cvt_roundss_i64 (__m128 __A, const int __R) 8106 { 8107 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 8108 } 8109 8110 extern __inline unsigned long long 8111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8112 _mm_cvtt_roundss_u64 (__m128 __A, const int __R) 8113 { 8114 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); 8115 } 8116 8117 extern __inline long long 8118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8119 _mm_cvtt_roundss_i64 (__m128 __A, const int __R) 8120 { 8121 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 8122 } 8123 8124 extern __inline long long 8125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8126 _mm_cvtt_roundss_si64 (__m128 __A, const int __R) 8127 { 8128 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 8129 } 8130 #else 8131 #define _mm_cvt_roundss_u64(A, B) \ 8132 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) 8133 8134 #define _mm_cvt_roundss_si64(A, B) \ 8135 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 8136 8137 #define _mm_cvt_roundss_i64(A, B) \ 8138 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 8139 8140 #define _mm_cvtt_roundss_u64(A, B) \ 8141 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) 8142 8143 #define _mm_cvtt_roundss_i64(A, B) \ 8144 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 8145 8146 #define _mm_cvtt_roundss_si64(A, B) \ 8147 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 8148 #endif 8149 #endif 8150 8151 #ifdef __OPTIMIZE__ 8152 extern __inline unsigned 8153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8154 _mm_cvt_roundss_u32 (__m128 __A, const int __R) 8155 { 8156 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); 8157 } 8158 8159 extern __inline int 8160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8161 _mm_cvt_roundss_si32 (__m128 __A, const int __R) 8162 { 8163 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 8164 } 8165 8166 extern __inline int 8167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8168 _mm_cvt_roundss_i32 (__m128 __A, const int __R) 8169 { 8170 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 8171 } 8172 8173 extern __inline unsigned 8174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8175 _mm_cvtt_roundss_u32 (__m128 __A, const int __R) 8176 { 8177 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); 8178 } 8179 8180 extern __inline int 8181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8182 _mm_cvtt_roundss_i32 (__m128 __A, const int __R) 8183 { 8184 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 8185 } 8186 8187 extern __inline int 8188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8189 _mm_cvtt_roundss_si32 (__m128 __A, const int __R) 8190 { 8191 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 8192 } 8193 #else 8194 #define _mm_cvt_roundss_u32(A, B) \ 8195 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) 8196 8197 #define _mm_cvt_roundss_si32(A, B) \ 8198 ((int)__builtin_ia32_vcvtss2si32(A, B)) 8199 8200 #define _mm_cvt_roundss_i32(A, B) \ 8201 ((int)__builtin_ia32_vcvtss2si32(A, B)) 8202 8203 #define _mm_cvtt_roundss_u32(A, B) \ 8204 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) 8205 8206 #define _mm_cvtt_roundss_si32(A, B) \ 8207 ((int)__builtin_ia32_vcvttss2si32(A, B)) 8208 8209 #define _mm_cvtt_roundss_i32(A, B) \ 8210 ((int)__builtin_ia32_vcvttss2si32(A, B)) 8211 #endif 8212 8213 #ifdef __x86_64__ 8214 #ifdef __OPTIMIZE__ 8215 extern __inline unsigned long long 8216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8217 _mm_cvt_roundsd_u64 (__m128d __A, const int __R) 8218 { 8219 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); 8220 } 8221 8222 extern __inline long long 8223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8224 _mm_cvt_roundsd_si64 (__m128d __A, const int __R) 8225 { 8226 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 8227 } 8228 8229 extern __inline long long 8230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8231 _mm_cvt_roundsd_i64 (__m128d __A, const int __R) 8232 { 8233 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 8234 } 8235 8236 extern __inline unsigned long long 8237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8238 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R) 8239 { 8240 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); 8241 } 8242 8243 extern __inline long long 8244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8245 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R) 8246 { 8247 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 8248 } 8249 8250 extern __inline long long 8251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8252 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R) 8253 { 8254 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 8255 } 8256 #else 8257 #define _mm_cvt_roundsd_u64(A, B) \ 8258 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) 8259 8260 #define _mm_cvt_roundsd_si64(A, B) \ 8261 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 8262 8263 #define _mm_cvt_roundsd_i64(A, B) \ 8264 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 8265 8266 #define _mm_cvtt_roundsd_u64(A, B) \ 8267 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) 8268 8269 #define _mm_cvtt_roundsd_si64(A, B) \ 8270 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 8271 8272 #define _mm_cvtt_roundsd_i64(A, B) \ 8273 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 8274 #endif 8275 #endif 8276 8277 #ifdef __OPTIMIZE__ 8278 extern __inline unsigned 8279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8280 _mm_cvt_roundsd_u32 (__m128d __A, const int __R) 8281 { 8282 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); 8283 } 8284 8285 extern __inline int 8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8287 _mm_cvt_roundsd_si32 (__m128d __A, const int __R) 8288 { 8289 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 8290 } 8291 8292 extern __inline int 8293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8294 _mm_cvt_roundsd_i32 (__m128d __A, const int __R) 8295 { 8296 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 8297 } 8298 8299 extern __inline unsigned 8300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8301 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R) 8302 { 8303 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); 8304 } 8305 8306 extern __inline int 8307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8308 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R) 8309 { 8310 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 8311 } 8312 8313 extern __inline int 8314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8315 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R) 8316 { 8317 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 8318 } 8319 #else 8320 #define _mm_cvt_roundsd_u32(A, B) \ 8321 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) 8322 8323 #define _mm_cvt_roundsd_si32(A, B) \ 8324 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 8325 8326 #define _mm_cvt_roundsd_i32(A, B) \ 8327 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 8328 8329 #define _mm_cvtt_roundsd_u32(A, B) \ 8330 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) 8331 8332 #define _mm_cvtt_roundsd_si32(A, B) \ 8333 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 8334 8335 #define _mm_cvtt_roundsd_i32(A, B) \ 8336 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 8337 #endif 8338 8339 extern __inline __m512d 8340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8341 _mm512_movedup_pd (__m512d __A) 8342 { 8343 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8344 (__v8df) 8345 _mm512_undefined_pd (), 8346 (__mmask8) -1); 8347 } 8348 8349 extern __inline __m512d 8350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8351 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 8352 { 8353 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8354 (__v8df) __W, 8355 (__mmask8) __U); 8356 } 8357 8358 extern __inline __m512d 8359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8360 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 8361 { 8362 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8363 (__v8df) 8364 _mm512_setzero_pd (), 8365 (__mmask8) __U); 8366 } 8367 8368 extern __inline __m512d 8369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8370 _mm512_unpacklo_pd (__m512d __A, __m512d __B) 8371 { 8372 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8373 (__v8df) __B, 8374 (__v8df) 8375 _mm512_undefined_pd (), 8376 (__mmask8) -1); 8377 } 8378 8379 extern __inline __m512d 8380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8381 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 8382 { 8383 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8384 (__v8df) __B, 8385 (__v8df) __W, 8386 (__mmask8) __U); 8387 } 8388 8389 extern __inline __m512d 8390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8391 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 8392 { 8393 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8394 (__v8df) __B, 8395 (__v8df) 8396 _mm512_setzero_pd (), 8397 (__mmask8) __U); 8398 } 8399 8400 extern __inline __m512d 8401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8402 _mm512_unpackhi_pd (__m512d __A, __m512d __B) 8403 { 8404 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8405 (__v8df) __B, 8406 (__v8df) 8407 _mm512_undefined_pd (), 8408 (__mmask8) -1); 8409 } 8410 8411 extern __inline __m512d 8412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8413 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 8414 { 8415 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8416 (__v8df) __B, 8417 (__v8df) __W, 8418 (__mmask8) __U); 8419 } 8420 8421 extern __inline __m512d 8422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8423 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) 8424 { 8425 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8426 (__v8df) __B, 8427 (__v8df) 8428 _mm512_setzero_pd (), 8429 (__mmask8) __U); 8430 } 8431 8432 extern __inline __m512 8433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8434 _mm512_unpackhi_ps (__m512 __A, __m512 __B) 8435 { 8436 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8437 (__v16sf) __B, 8438 (__v16sf) 8439 _mm512_undefined_ps (), 8440 (__mmask16) -1); 8441 } 8442 8443 extern __inline __m512 8444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8445 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 8446 { 8447 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8448 (__v16sf) __B, 8449 (__v16sf) __W, 8450 (__mmask16) __U); 8451 } 8452 8453 extern __inline __m512 8454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8455 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 8456 { 8457 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8458 (__v16sf) __B, 8459 (__v16sf) 8460 _mm512_setzero_ps (), 8461 (__mmask16) __U); 8462 } 8463 8464 #ifdef __OPTIMIZE__ 8465 extern __inline __m512d 8466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8467 _mm512_cvt_roundps_pd (__m256 __A, const int __R) 8468 { 8469 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8470 (__v8df) 8471 _mm512_undefined_pd (), 8472 (__mmask8) -1, __R); 8473 } 8474 8475 extern __inline __m512d 8476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8477 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, 8478 const int __R) 8479 { 8480 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8481 (__v8df) __W, 8482 (__mmask8) __U, __R); 8483 } 8484 8485 extern __inline __m512d 8486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8487 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) 8488 { 8489 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8490 (__v8df) 8491 _mm512_setzero_pd (), 8492 (__mmask8) __U, __R); 8493 } 8494 8495 extern __inline __m512 8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8497 _mm512_cvt_roundph_ps (__m256i __A, const int __R) 8498 { 8499 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8500 (__v16sf) 8501 _mm512_undefined_ps (), 8502 (__mmask16) -1, __R); 8503 } 8504 8505 extern __inline __m512 8506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8507 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, 8508 const int __R) 8509 { 8510 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8511 (__v16sf) __W, 8512 (__mmask16) __U, __R); 8513 } 8514 8515 extern __inline __m512 8516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8517 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) 8518 { 8519 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8520 (__v16sf) 8521 _mm512_setzero_ps (), 8522 (__mmask16) __U, __R); 8523 } 8524 8525 extern __inline __m256i 8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8527 _mm512_cvt_roundps_ph (__m512 __A, const int __I) 8528 { 8529 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8530 __I, 8531 (__v16hi) 8532 _mm256_undefined_si256 (), 8533 -1); 8534 } 8535 8536 extern __inline __m256i 8537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8538 _mm512_cvtps_ph (__m512 __A, const int __I) 8539 { 8540 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8541 __I, 8542 (__v16hi) 8543 _mm256_undefined_si256 (), 8544 -1); 8545 } 8546 8547 extern __inline __m256i 8548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8549 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, 8550 const int __I) 8551 { 8552 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8553 __I, 8554 (__v16hi) __U, 8555 (__mmask16) __W); 8556 } 8557 8558 extern __inline __m256i 8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8560 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) 8561 { 8562 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8563 __I, 8564 (__v16hi) __U, 8565 (__mmask16) __W); 8566 } 8567 8568 extern __inline __m256i 8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8570 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) 8571 { 8572 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8573 __I, 8574 (__v16hi) 8575 _mm256_setzero_si256 (), 8576 (__mmask16) __W); 8577 } 8578 8579 extern __inline __m256i 8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8581 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) 8582 { 8583 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8584 __I, 8585 (__v16hi) 8586 _mm256_setzero_si256 (), 8587 (__mmask16) __W); 8588 } 8589 #else 8590 #define _mm512_cvt_roundps_pd(A, B) \ 8591 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B) 8592 8593 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ 8594 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) 8595 8596 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \ 8597 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) 8598 8599 #define _mm512_cvt_roundph_ps(A, B) \ 8600 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 8601 8602 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ 8603 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) 8604 8605 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \ 8606 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) 8607 8608 #define _mm512_cvt_roundps_ph(A, I) \ 8609 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8610 (__v16hi)_mm256_undefined_si256 (), -1)) 8611 #define _mm512_cvtps_ph(A, I) \ 8612 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8613 (__v16hi)_mm256_undefined_si256 (), -1)) 8614 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 8615 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8616 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8617 #define _mm512_mask_cvtps_ph(U, W, A, I) \ 8618 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8619 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8620 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 8621 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8622 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8623 #define _mm512_maskz_cvtps_ph(W, A, I) \ 8624 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\ 8625 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8626 #endif 8627 8628 #ifdef __OPTIMIZE__ 8629 extern __inline __m256 8630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8631 _mm512_cvt_roundpd_ps (__m512d __A, const int __R) 8632 { 8633 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8634 (__v8sf) 8635 _mm256_undefined_ps (), 8636 (__mmask8) -1, __R); 8637 } 8638 8639 extern __inline __m256 8640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8641 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, 8642 const int __R) 8643 { 8644 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8645 (__v8sf) __W, 8646 (__mmask8) __U, __R); 8647 } 8648 8649 extern __inline __m256 8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8651 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) 8652 { 8653 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8654 (__v8sf) 8655 _mm256_setzero_ps (), 8656 (__mmask8) __U, __R); 8657 } 8658 8659 extern __inline __m128 8660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8661 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R) 8662 { 8663 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A, 8664 (__v2df) __B, 8665 __R); 8666 } 8667 8668 extern __inline __m128d 8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8670 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R) 8671 { 8672 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A, 8673 (__v4sf) __B, 8674 __R); 8675 } 8676 #else 8677 #define _mm512_cvt_roundpd_ps(A, B) \ 8678 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B) 8679 8680 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ 8681 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) 8682 8683 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ 8684 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) 8685 8686 #define _mm_cvt_roundsd_ss(A, B, C) \ 8687 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C) 8688 8689 #define _mm_cvt_roundss_sd(A, B, C) \ 8690 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C) 8691 #endif 8692 8693 extern __inline void 8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8695 _mm512_stream_si512 (__m512i * __P, __m512i __A) 8696 { 8697 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); 8698 } 8699 8700 extern __inline void 8701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8702 _mm512_stream_ps (float *__P, __m512 __A) 8703 { 8704 __builtin_ia32_movntps512 (__P, (__v16sf) __A); 8705 } 8706 8707 extern __inline void 8708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8709 _mm512_stream_pd (double *__P, __m512d __A) 8710 { 8711 __builtin_ia32_movntpd512 (__P, (__v8df) __A); 8712 } 8713 8714 extern __inline __m512i 8715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8716 _mm512_stream_load_si512 (void *__P) 8717 { 8718 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8719 } 8720 8721 /* Constants for mantissa extraction */ 8722 typedef enum 8723 { 8724 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 8725 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 8726 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 8727 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 8728 } _MM_MANTISSA_NORM_ENUM; 8729 8730 typedef enum 8731 { 8732 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 8733 _MM_MANT_SIGN_zero, /* sign = 0 */ 8734 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 8735 } _MM_MANTISSA_SIGN_ENUM; 8736 8737 #ifdef __OPTIMIZE__ 8738 extern __inline __m128 8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8740 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R) 8741 { 8742 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 8743 (__v4sf) __B, 8744 __R); 8745 } 8746 8747 extern __inline __m128 8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8749 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 8750 __m128 __B, const int __R) 8751 { 8752 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 8753 (__v4sf) __B, 8754 (__v4sf) __W, 8755 (__mmask8) __U, __R); 8756 } 8757 8758 extern __inline __m128 8759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8760 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 8761 const int __R) 8762 { 8763 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 8764 (__v4sf) __B, 8765 (__v4sf) 8766 _mm_setzero_ps (), 8767 (__mmask8) __U, __R); 8768 } 8769 8770 extern __inline __m128d 8771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8772 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R) 8773 { 8774 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 8775 (__v2df) __B, 8776 __R); 8777 } 8778 8779 extern __inline __m128d 8780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8781 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 8782 __m128d __B, const int __R) 8783 { 8784 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 8785 (__v2df) __B, 8786 (__v2df) __W, 8787 (__mmask8) __U, __R); 8788 } 8789 8790 extern __inline __m128d 8791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8792 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 8793 const int __R) 8794 { 8795 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 8796 (__v2df) __B, 8797 (__v2df) 8798 _mm_setzero_pd (), 8799 (__mmask8) __U, __R); 8800 } 8801 8802 extern __inline __m512 8803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8804 _mm512_getexp_round_ps (__m512 __A, const int __R) 8805 { 8806 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8807 (__v16sf) 8808 _mm512_undefined_ps (), 8809 (__mmask16) -1, __R); 8810 } 8811 8812 extern __inline __m512 8813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8814 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8815 const int __R) 8816 { 8817 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8818 (__v16sf) __W, 8819 (__mmask16) __U, __R); 8820 } 8821 8822 extern __inline __m512 8823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8824 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) 8825 { 8826 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8827 (__v16sf) 8828 _mm512_setzero_ps (), 8829 (__mmask16) __U, __R); 8830 } 8831 8832 extern __inline __m512d 8833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8834 _mm512_getexp_round_pd (__m512d __A, const int __R) 8835 { 8836 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8837 (__v8df) 8838 _mm512_undefined_pd (), 8839 (__mmask8) -1, __R); 8840 } 8841 8842 extern __inline __m512d 8843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8844 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8845 const int __R) 8846 { 8847 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8848 (__v8df) __W, 8849 (__mmask8) __U, __R); 8850 } 8851 8852 extern __inline __m512d 8853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8854 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) 8855 { 8856 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8857 (__v8df) 8858 _mm512_setzero_pd (), 8859 (__mmask8) __U, __R); 8860 } 8861 8862 extern __inline __m512d 8863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8864 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 8865 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8866 { 8867 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8868 (__C << 2) | __B, 8869 _mm512_undefined_pd (), 8870 (__mmask8) -1, __R); 8871 } 8872 8873 extern __inline __m512d 8874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8875 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8876 _MM_MANTISSA_NORM_ENUM __B, 8877 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8878 { 8879 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8880 (__C << 2) | __B, 8881 (__v8df) __W, __U, 8882 __R); 8883 } 8884 8885 extern __inline __m512d 8886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8887 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, 8888 _MM_MANTISSA_NORM_ENUM __B, 8889 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8890 { 8891 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8892 (__C << 2) | __B, 8893 (__v8df) 8894 _mm512_setzero_pd (), 8895 __U, __R); 8896 } 8897 8898 extern __inline __m512 8899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8900 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 8901 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8902 { 8903 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8904 (__C << 2) | __B, 8905 _mm512_undefined_ps (), 8906 (__mmask16) -1, __R); 8907 } 8908 8909 extern __inline __m512 8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8911 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8912 _MM_MANTISSA_NORM_ENUM __B, 8913 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8914 { 8915 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8916 (__C << 2) | __B, 8917 (__v16sf) __W, __U, 8918 __R); 8919 } 8920 8921 extern __inline __m512 8922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8923 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, 8924 _MM_MANTISSA_NORM_ENUM __B, 8925 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8926 { 8927 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8928 (__C << 2) | __B, 8929 (__v16sf) 8930 _mm512_setzero_ps (), 8931 __U, __R); 8932 } 8933 8934 extern __inline __m128d 8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8936 _mm_getmant_round_sd (__m128d __A, __m128d __B, 8937 _MM_MANTISSA_NORM_ENUM __C, 8938 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8939 { 8940 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 8941 (__v2df) __B, 8942 (__D << 2) | __C, 8943 __R); 8944 } 8945 8946 extern __inline __m128d 8947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8948 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 8949 __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 8950 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8951 { 8952 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 8953 (__v2df) __B, 8954 (__D << 2) | __C, 8955 (__v2df) __W, 8956 __U, __R); 8957 } 8958 8959 extern __inline __m128d 8960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8961 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 8962 _MM_MANTISSA_NORM_ENUM __C, 8963 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8964 { 8965 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 8966 (__v2df) __B, 8967 (__D << 2) | __C, 8968 (__v2df) 8969 _mm_setzero_pd(), 8970 __U, __R); 8971 } 8972 8973 extern __inline __m128 8974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8975 _mm_getmant_round_ss (__m128 __A, __m128 __B, 8976 _MM_MANTISSA_NORM_ENUM __C, 8977 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8978 { 8979 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 8980 (__v4sf) __B, 8981 (__D << 2) | __C, 8982 __R); 8983 } 8984 8985 extern __inline __m128 8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8987 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 8988 __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 8989 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8990 { 8991 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 8992 (__v4sf) __B, 8993 (__D << 2) | __C, 8994 (__v4sf) __W, 8995 __U, __R); 8996 } 8997 8998 extern __inline __m128 8999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9000 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 9001 _MM_MANTISSA_NORM_ENUM __C, 9002 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 9003 { 9004 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 9005 (__v4sf) __B, 9006 (__D << 2) | __C, 9007 (__v4sf) 9008 _mm_setzero_ps(), 9009 __U, __R); 9010 } 9011 9012 #else 9013 #define _mm512_getmant_round_pd(X, B, C, R) \ 9014 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 9015 (int)(((C)<<2) | (B)), \ 9016 (__v8df)(__m512d)_mm512_undefined_pd(), \ 9017 (__mmask8)-1,\ 9018 (R))) 9019 9020 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ 9021 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 9022 (int)(((C)<<2) | (B)), \ 9023 (__v8df)(__m512d)(W), \ 9024 (__mmask8)(U),\ 9025 (R))) 9026 9027 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ 9028 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 9029 (int)(((C)<<2) | (B)), \ 9030 (__v8df)(__m512d)_mm512_setzero_pd(), \ 9031 (__mmask8)(U),\ 9032 (R))) 9033 #define _mm512_getmant_round_ps(X, B, C, R) \ 9034 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 9035 (int)(((C)<<2) | (B)), \ 9036 (__v16sf)(__m512)_mm512_undefined_ps(), \ 9037 (__mmask16)-1,\ 9038 (R))) 9039 9040 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ 9041 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 9042 (int)(((C)<<2) | (B)), \ 9043 (__v16sf)(__m512)(W), \ 9044 (__mmask16)(U),\ 9045 (R))) 9046 9047 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ 9048 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 9049 (int)(((C)<<2) | (B)), \ 9050 (__v16sf)(__m512)_mm512_setzero_ps(), \ 9051 (__mmask16)(U),\ 9052 (R))) 9053 #define _mm_getmant_round_sd(X, Y, C, D, R) \ 9054 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 9055 (__v2df)(__m128d)(Y), \ 9056 (int)(((D)<<2) | (C)), \ 9057 (R))) 9058 9059 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \ 9060 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 9061 (__v2df)(__m128d)(Y), \ 9062 (int)(((D)<<2) | (C)), \ 9063 (__v2df)(__m128d)(W), \ 9064 (__mmask8)(U),\ 9065 (R))) 9066 9067 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \ 9068 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 9069 (__v2df)(__m128d)(Y), \ 9070 (int)(((D)<<2) | (C)), \ 9071 (__v2df)(__m128d)_mm_setzero_pd(), \ 9072 (__mmask8)(U),\ 9073 (R))) 9074 9075 #define _mm_getmant_round_ss(X, Y, C, D, R) \ 9076 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 9077 (__v4sf)(__m128)(Y), \ 9078 (int)(((D)<<2) | (C)), \ 9079 (R))) 9080 9081 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \ 9082 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 9083 (__v4sf)(__m128)(Y), \ 9084 (int)(((D)<<2) | (C)), \ 9085 (__v4sf)(__m128)(W), \ 9086 (__mmask8)(U),\ 9087 (R))) 9088 9089 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \ 9090 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 9091 (__v4sf)(__m128)(Y), \ 9092 (int)(((D)<<2) | (C)), \ 9093 (__v4sf)(__m128)_mm_setzero_ps(), \ 9094 (__mmask8)(U),\ 9095 (R))) 9096 9097 #define _mm_getexp_round_ss(A, B, R) \ 9098 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R)) 9099 9100 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \ 9101 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C) 9102 9103 #define _mm_maskz_getexp_round_ss(U, A, B, C) \ 9104 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 9105 9106 #define _mm_getexp_round_sd(A, B, R) \ 9107 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R)) 9108 9109 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \ 9110 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C) 9111 9112 #define _mm_maskz_getexp_round_sd(U, A, B, C) \ 9113 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 9114 9115 9116 #define _mm512_getexp_round_ps(A, R) \ 9117 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 9118 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R)) 9119 9120 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 9121 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 9122 (__v16sf)(__m512)(W), (__mmask16)(U), R)) 9123 9124 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 9125 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 9126 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) 9127 9128 #define _mm512_getexp_round_pd(A, R) \ 9129 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 9130 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R)) 9131 9132 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 9133 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 9134 (__v8df)(__m512d)(W), (__mmask8)(U), R)) 9135 9136 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 9137 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 9138 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) 9139 #endif 9140 9141 #ifdef __OPTIMIZE__ 9142 extern __inline __m512 9143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9144 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) 9145 { 9146 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 9147 (__v16sf) 9148 _mm512_undefined_ps (), 9149 -1, __R); 9150 } 9151 9152 extern __inline __m512 9153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9154 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, 9155 const int __imm, const int __R) 9156 { 9157 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 9158 (__v16sf) __A, 9159 (__mmask16) __B, __R); 9160 } 9161 9162 extern __inline __m512 9163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9164 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, 9165 const int __imm, const int __R) 9166 { 9167 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 9168 __imm, 9169 (__v16sf) 9170 _mm512_setzero_ps (), 9171 (__mmask16) __A, __R); 9172 } 9173 9174 extern __inline __m512d 9175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9176 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) 9177 { 9178 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 9179 (__v8df) 9180 _mm512_undefined_pd (), 9181 -1, __R); 9182 } 9183 9184 extern __inline __m512d 9185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9186 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, 9187 __m512d __C, const int __imm, const int __R) 9188 { 9189 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 9190 (__v8df) __A, 9191 (__mmask8) __B, __R); 9192 } 9193 9194 extern __inline __m512d 9195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9196 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, 9197 const int __imm, const int __R) 9198 { 9199 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 9200 __imm, 9201 (__v8df) 9202 _mm512_setzero_pd (), 9203 (__mmask8) __A, __R); 9204 } 9205 9206 extern __inline __m128 9207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9208 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, 9209 const int __R) 9210 { 9211 return (__m128) 9212 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A, 9213 (__v4sf) __B, __imm, 9214 (__v4sf) 9215 _mm_setzero_ps (), 9216 (__mmask8) -1, 9217 __R); 9218 } 9219 9220 extern __inline __m128 9221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9222 _mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C, 9223 __m128 __D, const int __imm, const int __R) 9224 { 9225 return (__m128) 9226 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C, 9227 (__v4sf) __D, __imm, 9228 (__v4sf) __A, 9229 (__mmask8) __B, 9230 __R); 9231 } 9232 9233 extern __inline __m128 9234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9235 _mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C, 9236 const int __imm, const int __R) 9237 { 9238 return (__m128) 9239 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B, 9240 (__v4sf) __C, __imm, 9241 (__v4sf) 9242 _mm_setzero_ps (), 9243 (__mmask8) __A, 9244 __R); 9245 } 9246 9247 extern __inline __m128d 9248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9249 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, 9250 const int __R) 9251 { 9252 return (__m128d) 9253 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A, 9254 (__v2df) __B, __imm, 9255 (__v2df) 9256 _mm_setzero_pd (), 9257 (__mmask8) -1, 9258 __R); 9259 } 9260 9261 extern __inline __m128d 9262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9263 _mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C, 9264 __m128d __D, const int __imm, const int __R) 9265 { 9266 return (__m128d) 9267 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C, 9268 (__v2df) __D, __imm, 9269 (__v2df) __A, 9270 (__mmask8) __B, 9271 __R); 9272 } 9273 9274 extern __inline __m128d 9275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9276 _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C, 9277 const int __imm, const int __R) 9278 { 9279 return (__m128d) 9280 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B, 9281 (__v2df) __C, __imm, 9282 (__v2df) 9283 _mm_setzero_pd (), 9284 (__mmask8) __A, 9285 __R); 9286 } 9287 9288 #else 9289 #define _mm512_roundscale_round_ps(A, B, R) \ 9290 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 9291 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R)) 9292 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ 9293 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 9294 (int)(D), \ 9295 (__v16sf)(__m512)(A), \ 9296 (__mmask16)(B), R)) 9297 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ 9298 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 9299 (int)(C), \ 9300 (__v16sf)_mm512_setzero_ps(),\ 9301 (__mmask16)(A), R)) 9302 #define _mm512_roundscale_round_pd(A, B, R) \ 9303 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 9304 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R)) 9305 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ 9306 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 9307 (int)(D), \ 9308 (__v8df)(__m512d)(A), \ 9309 (__mmask8)(B), R)) 9310 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ 9311 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 9312 (int)(C), \ 9313 (__v8df)_mm512_setzero_pd(),\ 9314 (__mmask8)(A), R)) 9315 #define _mm_roundscale_round_ss(A, B, I, R) \ 9316 ((__m128) \ 9317 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ 9318 (__v4sf) (__m128) (B), \ 9319 (int) (I), \ 9320 (__v4sf) _mm_setzero_ps (), \ 9321 (__mmask8) (-1), \ 9322 (int) (R))) 9323 #define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \ 9324 ((__m128) \ 9325 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \ 9326 (__v4sf) (__m128) (C), \ 9327 (int) (I), \ 9328 (__v4sf) (__m128) (A), \ 9329 (__mmask8) (U), \ 9330 (int) (R))) 9331 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 9332 ((__m128) \ 9333 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ 9334 (__v4sf) (__m128) (B), \ 9335 (int) (I), \ 9336 (__v4sf) _mm_setzero_ps (), \ 9337 (__mmask8) (U), \ 9338 (int) (R))) 9339 #define _mm_roundscale_round_sd(A, B, I, R) \ 9340 ((__m128d) \ 9341 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ 9342 (__v2df) (__m128d) (B), \ 9343 (int) (I), \ 9344 (__v2df) _mm_setzero_pd (), \ 9345 (__mmask8) (-1), \ 9346 (int) (R))) 9347 #define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \ 9348 ((__m128d) \ 9349 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \ 9350 (__v2df) (__m128d) (C), \ 9351 (int) (I), \ 9352 (__v2df) (__m128d) (A), \ 9353 (__mmask8) (U), \ 9354 (int) (R))) 9355 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 9356 ((__m128d) \ 9357 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ 9358 (__v2df) (__m128d) (B), \ 9359 (int) (I), \ 9360 (__v2df) _mm_setzero_pd (), \ 9361 (__mmask8) (U), \ 9362 (int) (R))) 9363 #endif 9364 9365 extern __inline __m512 9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9367 _mm512_floor_ps (__m512 __A) 9368 { 9369 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9370 _MM_FROUND_FLOOR, 9371 (__v16sf) __A, -1, 9372 _MM_FROUND_CUR_DIRECTION); 9373 } 9374 9375 extern __inline __m512d 9376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9377 _mm512_floor_pd (__m512d __A) 9378 { 9379 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9380 _MM_FROUND_FLOOR, 9381 (__v8df) __A, -1, 9382 _MM_FROUND_CUR_DIRECTION); 9383 } 9384 9385 extern __inline __m512 9386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9387 _mm512_ceil_ps (__m512 __A) 9388 { 9389 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9390 _MM_FROUND_CEIL, 9391 (__v16sf) __A, -1, 9392 _MM_FROUND_CUR_DIRECTION); 9393 } 9394 9395 extern __inline __m512d 9396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9397 _mm512_ceil_pd (__m512d __A) 9398 { 9399 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9400 _MM_FROUND_CEIL, 9401 (__v8df) __A, -1, 9402 _MM_FROUND_CUR_DIRECTION); 9403 } 9404 9405 extern __inline __m512 9406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9407 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 9408 { 9409 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9410 _MM_FROUND_FLOOR, 9411 (__v16sf) __W, __U, 9412 _MM_FROUND_CUR_DIRECTION); 9413 } 9414 9415 extern __inline __m512d 9416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9417 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 9418 { 9419 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9420 _MM_FROUND_FLOOR, 9421 (__v8df) __W, __U, 9422 _MM_FROUND_CUR_DIRECTION); 9423 } 9424 9425 extern __inline __m512 9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9427 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 9428 { 9429 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9430 _MM_FROUND_CEIL, 9431 (__v16sf) __W, __U, 9432 _MM_FROUND_CUR_DIRECTION); 9433 } 9434 9435 extern __inline __m512d 9436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9437 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 9438 { 9439 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9440 _MM_FROUND_CEIL, 9441 (__v8df) __W, __U, 9442 _MM_FROUND_CUR_DIRECTION); 9443 } 9444 9445 #ifdef __OPTIMIZE__ 9446 extern __inline __m512i 9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9448 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) 9449 { 9450 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9451 (__v16si) __B, __imm, 9452 (__v16si) 9453 _mm512_undefined_epi32 (), 9454 (__mmask16) -1); 9455 } 9456 9457 extern __inline __m512i 9458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9459 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 9460 __m512i __B, const int __imm) 9461 { 9462 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9463 (__v16si) __B, __imm, 9464 (__v16si) __W, 9465 (__mmask16) __U); 9466 } 9467 9468 extern __inline __m512i 9469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9470 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 9471 const int __imm) 9472 { 9473 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9474 (__v16si) __B, __imm, 9475 (__v16si) 9476 _mm512_setzero_si512 (), 9477 (__mmask16) __U); 9478 } 9479 9480 extern __inline __m512i 9481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9482 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) 9483 { 9484 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9485 (__v8di) __B, __imm, 9486 (__v8di) 9487 _mm512_undefined_epi32 (), 9488 (__mmask8) -1); 9489 } 9490 9491 extern __inline __m512i 9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9493 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 9494 __m512i __B, const int __imm) 9495 { 9496 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9497 (__v8di) __B, __imm, 9498 (__v8di) __W, 9499 (__mmask8) __U); 9500 } 9501 9502 extern __inline __m512i 9503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9504 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 9505 const int __imm) 9506 { 9507 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9508 (__v8di) __B, __imm, 9509 (__v8di) 9510 _mm512_setzero_si512 (), 9511 (__mmask8) __U); 9512 } 9513 #else 9514 #define _mm512_alignr_epi32(X, Y, C) \ 9515 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9516 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\ 9517 (__mmask16)-1)) 9518 9519 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ 9520 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9521 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ 9522 (__mmask16)(U))) 9523 9524 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \ 9525 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9526 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\ 9527 (__mmask16)(U))) 9528 9529 #define _mm512_alignr_epi64(X, Y, C) \ 9530 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9531 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \ 9532 (__mmask8)-1)) 9533 9534 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ 9535 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9536 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) 9537 9538 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \ 9539 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9540 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\ 9541 (__mmask8)(U))) 9542 #endif 9543 9544 extern __inline __mmask16 9545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9546 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) 9547 { 9548 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 9549 (__v16si) __B, 9550 (__mmask16) -1); 9551 } 9552 9553 extern __inline __mmask16 9554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9555 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 9556 { 9557 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 9558 (__v16si) __B, __U); 9559 } 9560 9561 extern __inline __mmask8 9562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9563 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 9564 { 9565 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 9566 (__v8di) __B, __U); 9567 } 9568 9569 extern __inline __mmask8 9570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9571 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) 9572 { 9573 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 9574 (__v8di) __B, 9575 (__mmask8) -1); 9576 } 9577 9578 extern __inline __mmask16 9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9580 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) 9581 { 9582 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 9583 (__v16si) __B, 9584 (__mmask16) -1); 9585 } 9586 9587 extern __inline __mmask16 9588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9589 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 9590 { 9591 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 9592 (__v16si) __B, __U); 9593 } 9594 9595 extern __inline __mmask8 9596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9597 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 9598 { 9599 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 9600 (__v8di) __B, __U); 9601 } 9602 9603 extern __inline __mmask8 9604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9605 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) 9606 { 9607 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 9608 (__v8di) __B, 9609 (__mmask8) -1); 9610 } 9611 9612 extern __inline __mmask16 9613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9614 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y) 9615 { 9616 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9617 (__v16si) __Y, 5, 9618 (__mmask16) -1); 9619 } 9620 9621 extern __inline __mmask16 9622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9623 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9624 { 9625 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9626 (__v16si) __Y, 5, 9627 (__mmask16) __M); 9628 } 9629 9630 extern __inline __mmask16 9631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9632 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9633 { 9634 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9635 (__v16si) __Y, 5, 9636 (__mmask16) __M); 9637 } 9638 9639 extern __inline __mmask16 9640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9641 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y) 9642 { 9643 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9644 (__v16si) __Y, 5, 9645 (__mmask16) -1); 9646 } 9647 9648 extern __inline __mmask8 9649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9650 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9651 { 9652 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9653 (__v8di) __Y, 5, 9654 (__mmask8) __M); 9655 } 9656 9657 extern __inline __mmask8 9658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9659 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y) 9660 { 9661 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9662 (__v8di) __Y, 5, 9663 (__mmask8) -1); 9664 } 9665 9666 extern __inline __mmask8 9667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9668 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9669 { 9670 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9671 (__v8di) __Y, 5, 9672 (__mmask8) __M); 9673 } 9674 9675 extern __inline __mmask8 9676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9677 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y) 9678 { 9679 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9680 (__v8di) __Y, 5, 9681 (__mmask8) -1); 9682 } 9683 9684 extern __inline __mmask16 9685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9686 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9687 { 9688 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9689 (__v16si) __Y, 2, 9690 (__mmask16) __M); 9691 } 9692 9693 extern __inline __mmask16 9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9695 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y) 9696 { 9697 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9698 (__v16si) __Y, 2, 9699 (__mmask16) -1); 9700 } 9701 9702 extern __inline __mmask16 9703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9704 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9705 { 9706 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9707 (__v16si) __Y, 2, 9708 (__mmask16) __M); 9709 } 9710 9711 extern __inline __mmask16 9712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9713 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y) 9714 { 9715 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9716 (__v16si) __Y, 2, 9717 (__mmask16) -1); 9718 } 9719 9720 extern __inline __mmask8 9721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9722 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9723 { 9724 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9725 (__v8di) __Y, 2, 9726 (__mmask8) __M); 9727 } 9728 9729 extern __inline __mmask8 9730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9731 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y) 9732 { 9733 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9734 (__v8di) __Y, 2, 9735 (__mmask8) -1); 9736 } 9737 9738 extern __inline __mmask8 9739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9740 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9741 { 9742 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9743 (__v8di) __Y, 2, 9744 (__mmask8) __M); 9745 } 9746 9747 extern __inline __mmask8 9748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9749 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y) 9750 { 9751 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9752 (__v8di) __Y, 2, 9753 (__mmask8) -1); 9754 } 9755 9756 extern __inline __mmask16 9757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9758 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9759 { 9760 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9761 (__v16si) __Y, 1, 9762 (__mmask16) __M); 9763 } 9764 9765 extern __inline __mmask16 9766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9767 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y) 9768 { 9769 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9770 (__v16si) __Y, 1, 9771 (__mmask16) -1); 9772 } 9773 9774 extern __inline __mmask16 9775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9776 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9777 { 9778 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9779 (__v16si) __Y, 1, 9780 (__mmask16) __M); 9781 } 9782 9783 extern __inline __mmask16 9784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9785 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y) 9786 { 9787 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9788 (__v16si) __Y, 1, 9789 (__mmask16) -1); 9790 } 9791 9792 extern __inline __mmask8 9793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9794 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9795 { 9796 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9797 (__v8di) __Y, 1, 9798 (__mmask8) __M); 9799 } 9800 9801 extern __inline __mmask8 9802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9803 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y) 9804 { 9805 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9806 (__v8di) __Y, 1, 9807 (__mmask8) -1); 9808 } 9809 9810 extern __inline __mmask8 9811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9812 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9813 { 9814 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9815 (__v8di) __Y, 1, 9816 (__mmask8) __M); 9817 } 9818 9819 extern __inline __mmask8 9820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9821 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y) 9822 { 9823 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9824 (__v8di) __Y, 1, 9825 (__mmask8) -1); 9826 } 9827 9828 extern __inline __mmask16 9829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9830 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y) 9831 { 9832 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9833 (__v16si) __Y, 4, 9834 (__mmask16) -1); 9835 } 9836 9837 extern __inline __mmask16 9838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9839 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9840 { 9841 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9842 (__v16si) __Y, 4, 9843 (__mmask16) __M); 9844 } 9845 9846 extern __inline __mmask16 9847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9848 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9849 { 9850 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9851 (__v16si) __Y, 4, 9852 (__mmask16) __M); 9853 } 9854 9855 extern __inline __mmask16 9856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9857 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) 9858 { 9859 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9860 (__v16si) __Y, 4, 9861 (__mmask16) -1); 9862 } 9863 9864 extern __inline __mmask8 9865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9866 _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9867 { 9868 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9869 (__v8di) __Y, 4, 9870 (__mmask8) __M); 9871 } 9872 9873 extern __inline __mmask8 9874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9875 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y) 9876 { 9877 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9878 (__v8di) __Y, 4, 9879 (__mmask8) -1); 9880 } 9881 9882 extern __inline __mmask8 9883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9884 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9885 { 9886 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9887 (__v8di) __Y, 4, 9888 (__mmask8) __M); 9889 } 9890 9891 extern __inline __mmask8 9892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9893 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y) 9894 { 9895 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9896 (__v8di) __Y, 4, 9897 (__mmask8) -1); 9898 } 9899 9900 #define _MM_CMPINT_EQ 0x0 9901 #define _MM_CMPINT_LT 0x1 9902 #define _MM_CMPINT_LE 0x2 9903 #define _MM_CMPINT_UNUSED 0x3 9904 #define _MM_CMPINT_NE 0x4 9905 #define _MM_CMPINT_NLT 0x5 9906 #define _MM_CMPINT_GE 0x5 9907 #define _MM_CMPINT_NLE 0x6 9908 #define _MM_CMPINT_GT 0x6 9909 9910 #ifdef __OPTIMIZE__ 9911 extern __inline __mmask16 9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9913 _kshiftli_mask16 (__mmask16 __A, unsigned int __B) 9914 { 9915 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A, 9916 (__mmask8) __B); 9917 } 9918 9919 extern __inline __mmask16 9920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9921 _kshiftri_mask16 (__mmask16 __A, unsigned int __B) 9922 { 9923 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A, 9924 (__mmask8) __B); 9925 } 9926 9927 extern __inline __mmask8 9928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9929 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) 9930 { 9931 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9932 (__v8di) __Y, __P, 9933 (__mmask8) -1); 9934 } 9935 9936 extern __inline __mmask16 9937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9938 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) 9939 { 9940 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9941 (__v16si) __Y, __P, 9942 (__mmask16) -1); 9943 } 9944 9945 extern __inline __mmask8 9946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9947 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) 9948 { 9949 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9950 (__v8di) __Y, __P, 9951 (__mmask8) -1); 9952 } 9953 9954 extern __inline __mmask16 9955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9956 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) 9957 { 9958 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9959 (__v16si) __Y, __P, 9960 (__mmask16) -1); 9961 } 9962 9963 extern __inline __mmask8 9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9965 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, 9966 const int __R) 9967 { 9968 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9969 (__v8df) __Y, __P, 9970 (__mmask8) -1, __R); 9971 } 9972 9973 extern __inline __mmask16 9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9975 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) 9976 { 9977 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9978 (__v16sf) __Y, __P, 9979 (__mmask16) -1, __R); 9980 } 9981 9982 extern __inline __mmask8 9983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9984 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 9985 const int __P) 9986 { 9987 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9988 (__v8di) __Y, __P, 9989 (__mmask8) __U); 9990 } 9991 9992 extern __inline __mmask16 9993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9994 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 9995 const int __P) 9996 { 9997 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9998 (__v16si) __Y, __P, 9999 (__mmask16) __U); 10000 } 10001 10002 extern __inline __mmask8 10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10004 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 10005 const int __P) 10006 { 10007 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 10008 (__v8di) __Y, __P, 10009 (__mmask8) __U); 10010 } 10011 10012 extern __inline __mmask16 10013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10014 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 10015 const int __P) 10016 { 10017 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 10018 (__v16si) __Y, __P, 10019 (__mmask16) __U); 10020 } 10021 10022 extern __inline __mmask8 10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10024 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, 10025 const int __P, const int __R) 10026 { 10027 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 10028 (__v8df) __Y, __P, 10029 (__mmask8) __U, __R); 10030 } 10031 10032 extern __inline __mmask16 10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10034 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, 10035 const int __P, const int __R) 10036 { 10037 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 10038 (__v16sf) __Y, __P, 10039 (__mmask16) __U, __R); 10040 } 10041 10042 extern __inline __mmask8 10043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10044 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) 10045 { 10046 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 10047 (__v2df) __Y, __P, 10048 (__mmask8) -1, __R); 10049 } 10050 10051 extern __inline __mmask8 10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10053 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, 10054 const int __P, const int __R) 10055 { 10056 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 10057 (__v2df) __Y, __P, 10058 (__mmask8) __M, __R); 10059 } 10060 10061 extern __inline __mmask8 10062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10063 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) 10064 { 10065 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 10066 (__v4sf) __Y, __P, 10067 (__mmask8) -1, __R); 10068 } 10069 10070 extern __inline __mmask8 10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10072 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, 10073 const int __P, const int __R) 10074 { 10075 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 10076 (__v4sf) __Y, __P, 10077 (__mmask8) __M, __R); 10078 } 10079 10080 #else 10081 #define _kshiftli_mask16(X, Y) \ 10082 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y))) 10083 10084 #define _kshiftri_mask16(X, Y) \ 10085 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y))) 10086 10087 #define _mm512_cmp_epi64_mask(X, Y, P) \ 10088 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 10089 (__v8di)(__m512i)(Y), (int)(P),\ 10090 (__mmask8)-1)) 10091 10092 #define _mm512_cmp_epi32_mask(X, Y, P) \ 10093 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 10094 (__v16si)(__m512i)(Y), (int)(P), \ 10095 (__mmask16)-1)) 10096 10097 #define _mm512_cmp_epu64_mask(X, Y, P) \ 10098 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 10099 (__v8di)(__m512i)(Y), (int)(P),\ 10100 (__mmask8)-1)) 10101 10102 #define _mm512_cmp_epu32_mask(X, Y, P) \ 10103 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 10104 (__v16si)(__m512i)(Y), (int)(P), \ 10105 (__mmask16)-1)) 10106 10107 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \ 10108 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 10109 (__v8df)(__m512d)(Y), (int)(P),\ 10110 (__mmask8)-1, R)) 10111 10112 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \ 10113 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 10114 (__v16sf)(__m512)(Y), (int)(P),\ 10115 (__mmask16)-1, R)) 10116 10117 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ 10118 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 10119 (__v8di)(__m512i)(Y), (int)(P),\ 10120 (__mmask8)(M))) 10121 10122 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ 10123 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 10124 (__v16si)(__m512i)(Y), (int)(P), \ 10125 (__mmask16)(M))) 10126 10127 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ 10128 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 10129 (__v8di)(__m512i)(Y), (int)(P),\ 10130 (__mmask8)(M))) 10131 10132 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ 10133 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 10134 (__v16si)(__m512i)(Y), (int)(P), \ 10135 (__mmask16)(M))) 10136 10137 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ 10138 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 10139 (__v8df)(__m512d)(Y), (int)(P),\ 10140 (__mmask8)(M), R)) 10141 10142 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ 10143 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 10144 (__v16sf)(__m512)(Y), (int)(P),\ 10145 (__mmask16)(M), R)) 10146 10147 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 10148 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 10149 (__v2df)(__m128d)(Y), (int)(P),\ 10150 (__mmask8)-1, R)) 10151 10152 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 10153 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 10154 (__v2df)(__m128d)(Y), (int)(P),\ 10155 (M), R)) 10156 10157 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 10158 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 10159 (__v4sf)(__m128)(Y), (int)(P), \ 10160 (__mmask8)-1, R)) 10161 10162 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 10163 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 10164 (__v4sf)(__m128)(Y), (int)(P), \ 10165 (M), R)) 10166 #endif 10167 10168 #ifdef __OPTIMIZE__ 10169 extern __inline __m512 10170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10171 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale) 10172 { 10173 __m512 __v1_old = _mm512_undefined_ps (); 10174 __mmask16 __mask = 0xFFFF; 10175 10176 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old, 10177 __addr, 10178 (__v16si) __index, 10179 __mask, __scale); 10180 } 10181 10182 extern __inline __m512 10183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10184 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask, 10185 __m512i __index, void const *__addr, int __scale) 10186 { 10187 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old, 10188 __addr, 10189 (__v16si) __index, 10190 __mask, __scale); 10191 } 10192 10193 extern __inline __m512d 10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10195 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale) 10196 { 10197 __m512d __v1_old = _mm512_undefined_pd (); 10198 __mmask8 __mask = 0xFF; 10199 10200 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 10201 __addr, 10202 (__v8si) __index, __mask, 10203 __scale); 10204 } 10205 10206 extern __inline __m512d 10207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10208 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, 10209 __m256i __index, void const *__addr, int __scale) 10210 { 10211 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 10212 __addr, 10213 (__v8si) __index, 10214 __mask, __scale); 10215 } 10216 10217 extern __inline __m256 10218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10219 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale) 10220 { 10221 __m256 __v1_old = _mm256_undefined_ps (); 10222 __mmask8 __mask = 0xFF; 10223 10224 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 10225 __addr, 10226 (__v8di) __index, __mask, 10227 __scale); 10228 } 10229 10230 extern __inline __m256 10231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10232 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, 10233 __m512i __index, void const *__addr, int __scale) 10234 { 10235 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 10236 __addr, 10237 (__v8di) __index, 10238 __mask, __scale); 10239 } 10240 10241 extern __inline __m512d 10242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10243 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale) 10244 { 10245 __m512d __v1_old = _mm512_undefined_pd (); 10246 __mmask8 __mask = 0xFF; 10247 10248 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 10249 __addr, 10250 (__v8di) __index, __mask, 10251 __scale); 10252 } 10253 10254 extern __inline __m512d 10255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10256 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, 10257 __m512i __index, void const *__addr, int __scale) 10258 { 10259 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 10260 __addr, 10261 (__v8di) __index, 10262 __mask, __scale); 10263 } 10264 10265 extern __inline __m512i 10266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10267 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale) 10268 { 10269 __m512i __v1_old = _mm512_undefined_epi32 (); 10270 __mmask16 __mask = 0xFFFF; 10271 10272 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 10273 __addr, 10274 (__v16si) __index, 10275 __mask, __scale); 10276 } 10277 10278 extern __inline __m512i 10279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10280 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, 10281 __m512i __index, void const *__addr, int __scale) 10282 { 10283 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 10284 __addr, 10285 (__v16si) __index, 10286 __mask, __scale); 10287 } 10288 10289 extern __inline __m512i 10290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10291 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale) 10292 { 10293 __m512i __v1_old = _mm512_undefined_epi32 (); 10294 __mmask8 __mask = 0xFF; 10295 10296 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 10297 __addr, 10298 (__v8si) __index, __mask, 10299 __scale); 10300 } 10301 10302 extern __inline __m512i 10303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10304 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, 10305 __m256i __index, void const *__addr, 10306 int __scale) 10307 { 10308 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 10309 __addr, 10310 (__v8si) __index, 10311 __mask, __scale); 10312 } 10313 10314 extern __inline __m256i 10315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10316 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale) 10317 { 10318 __m256i __v1_old = _mm256_undefined_si256 (); 10319 __mmask8 __mask = 0xFF; 10320 10321 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 10322 __addr, 10323 (__v8di) __index, 10324 __mask, __scale); 10325 } 10326 10327 extern __inline __m256i 10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10329 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, 10330 __m512i __index, void const *__addr, int __scale) 10331 { 10332 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 10333 __addr, 10334 (__v8di) __index, 10335 __mask, __scale); 10336 } 10337 10338 extern __inline __m512i 10339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10340 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale) 10341 { 10342 __m512i __v1_old = _mm512_undefined_epi32 (); 10343 __mmask8 __mask = 0xFF; 10344 10345 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 10346 __addr, 10347 (__v8di) __index, __mask, 10348 __scale); 10349 } 10350 10351 extern __inline __m512i 10352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10353 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, 10354 __m512i __index, void const *__addr, 10355 int __scale) 10356 { 10357 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 10358 __addr, 10359 (__v8di) __index, 10360 __mask, __scale); 10361 } 10362 10363 extern __inline void 10364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10365 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale) 10366 { 10367 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, 10368 (__v16si) __index, (__v16sf) __v1, __scale); 10369 } 10370 10371 extern __inline void 10372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10373 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask, 10374 __m512i __index, __m512 __v1, int __scale) 10375 { 10376 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, 10377 (__v16sf) __v1, __scale); 10378 } 10379 10380 extern __inline void 10381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10382 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, 10383 int __scale) 10384 { 10385 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, 10386 (__v8si) __index, (__v8df) __v1, __scale); 10387 } 10388 10389 extern __inline void 10390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10391 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask, 10392 __m256i __index, __m512d __v1, int __scale) 10393 { 10394 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, 10395 (__v8df) __v1, __scale); 10396 } 10397 10398 extern __inline void 10399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10400 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale) 10401 { 10402 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, 10403 (__v8di) __index, (__v8sf) __v1, __scale); 10404 } 10405 10406 extern __inline void 10407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10408 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask, 10409 __m512i __index, __m256 __v1, int __scale) 10410 { 10411 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, 10412 (__v8sf) __v1, __scale); 10413 } 10414 10415 extern __inline void 10416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10417 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1, 10418 int __scale) 10419 { 10420 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, 10421 (__v8di) __index, (__v8df) __v1, __scale); 10422 } 10423 10424 extern __inline void 10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10426 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask, 10427 __m512i __index, __m512d __v1, int __scale) 10428 { 10429 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, 10430 (__v8df) __v1, __scale); 10431 } 10432 10433 extern __inline void 10434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10435 _mm512_i32scatter_epi32 (void *__addr, __m512i __index, 10436 __m512i __v1, int __scale) 10437 { 10438 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, 10439 (__v16si) __index, (__v16si) __v1, __scale); 10440 } 10441 10442 extern __inline void 10443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10444 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask, 10445 __m512i __index, __m512i __v1, int __scale) 10446 { 10447 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, 10448 (__v16si) __v1, __scale); 10449 } 10450 10451 extern __inline void 10452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10453 _mm512_i32scatter_epi64 (void *__addr, __m256i __index, 10454 __m512i __v1, int __scale) 10455 { 10456 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, 10457 (__v8si) __index, (__v8di) __v1, __scale); 10458 } 10459 10460 extern __inline void 10461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10462 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, 10463 __m256i __index, __m512i __v1, int __scale) 10464 { 10465 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, 10466 (__v8di) __v1, __scale); 10467 } 10468 10469 extern __inline void 10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10471 _mm512_i64scatter_epi32 (void *__addr, __m512i __index, 10472 __m256i __v1, int __scale) 10473 { 10474 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, 10475 (__v8di) __index, (__v8si) __v1, __scale); 10476 } 10477 10478 extern __inline void 10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10480 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, 10481 __m512i __index, __m256i __v1, int __scale) 10482 { 10483 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, 10484 (__v8si) __v1, __scale); 10485 } 10486 10487 extern __inline void 10488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10489 _mm512_i64scatter_epi64 (void *__addr, __m512i __index, 10490 __m512i __v1, int __scale) 10491 { 10492 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, 10493 (__v8di) __index, (__v8di) __v1, __scale); 10494 } 10495 10496 extern __inline void 10497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10498 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, 10499 __m512i __index, __m512i __v1, int __scale) 10500 { 10501 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, 10502 (__v8di) __v1, __scale); 10503 } 10504 #else 10505 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ 10506 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ 10507 (void const *) (ADDR), \ 10508 (__v16si)(__m512i) (INDEX), \ 10509 (__mmask16)0xFFFF, \ 10510 (int) (SCALE)) 10511 10512 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10513 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \ 10514 (void const *) (ADDR), \ 10515 (__v16si)(__m512i) (INDEX), \ 10516 (__mmask16) (MASK), \ 10517 (int) (SCALE)) 10518 10519 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ 10520 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ 10521 (void const *) (ADDR), \ 10522 (__v8si)(__m256i) (INDEX), \ 10523 (__mmask8)0xFF, (int) (SCALE)) 10524 10525 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10526 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \ 10527 (void const *) (ADDR), \ 10528 (__v8si)(__m256i) (INDEX), \ 10529 (__mmask8) (MASK), \ 10530 (int) (SCALE)) 10531 10532 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ 10533 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ 10534 (void const *) (ADDR), \ 10535 (__v8di)(__m512i) (INDEX), \ 10536 (__mmask8)0xFF, (int) (SCALE)) 10537 10538 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10539 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \ 10540 (void const *) (ADDR), \ 10541 (__v8di)(__m512i) (INDEX), \ 10542 (__mmask8) (MASK), \ 10543 (int) (SCALE)) 10544 10545 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ 10546 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ 10547 (void const *) (ADDR), \ 10548 (__v8di)(__m512i) (INDEX), \ 10549 (__mmask8)0xFF, (int) (SCALE)) 10550 10551 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10552 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \ 10553 (void const *) (ADDR), \ 10554 (__v8di)(__m512i) (INDEX), \ 10555 (__mmask8) (MASK), \ 10556 (int) (SCALE)) 10557 10558 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ 10559 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\ 10560 (void const *) (ADDR), \ 10561 (__v16si)(__m512i) (INDEX), \ 10562 (__mmask16)0xFFFF, \ 10563 (int) (SCALE)) 10564 10565 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10566 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \ 10567 (void const *) (ADDR), \ 10568 (__v16si)(__m512i) (INDEX), \ 10569 (__mmask16) (MASK), \ 10570 (int) (SCALE)) 10571 10572 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ 10573 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\ 10574 (void const *) (ADDR), \ 10575 (__v8si)(__m256i) (INDEX), \ 10576 (__mmask8)0xFF, (int) (SCALE)) 10577 10578 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10579 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \ 10580 (void const *) (ADDR), \ 10581 (__v8si)(__m256i) (INDEX), \ 10582 (__mmask8) (MASK), \ 10583 (int) (SCALE)) 10584 10585 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ 10586 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\ 10587 (void const *) (ADDR), \ 10588 (__v8di)(__m512i) (INDEX), \ 10589 (__mmask8)0xFF, (int) (SCALE)) 10590 10591 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10592 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \ 10593 (void const *) (ADDR), \ 10594 (__v8di)(__m512i) (INDEX), \ 10595 (__mmask8) (MASK), \ 10596 (int) (SCALE)) 10597 10598 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ 10599 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\ 10600 (void const *) (ADDR), \ 10601 (__v8di)(__m512i) (INDEX), \ 10602 (__mmask8)0xFF, (int) (SCALE)) 10603 10604 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10605 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \ 10606 (void const *) (ADDR), \ 10607 (__v8di)(__m512i) (INDEX), \ 10608 (__mmask8) (MASK), \ 10609 (int) (SCALE)) 10610 10611 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 10612 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \ 10613 (__v16si)(__m512i) (INDEX), \ 10614 (__v16sf)(__m512) (V1), (int) (SCALE)) 10615 10616 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 10617 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \ 10618 (__v16si)(__m512i) (INDEX), \ 10619 (__v16sf)(__m512) (V1), (int) (SCALE)) 10620 10621 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 10622 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \ 10623 (__v8si)(__m256i) (INDEX), \ 10624 (__v8df)(__m512d) (V1), (int) (SCALE)) 10625 10626 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 10627 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \ 10628 (__v8si)(__m256i) (INDEX), \ 10629 (__v8df)(__m512d) (V1), (int) (SCALE)) 10630 10631 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 10632 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \ 10633 (__v8di)(__m512i) (INDEX), \ 10634 (__v8sf)(__m256) (V1), (int) (SCALE)) 10635 10636 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 10637 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \ 10638 (__v8di)(__m512i) (INDEX), \ 10639 (__v8sf)(__m256) (V1), (int) (SCALE)) 10640 10641 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 10642 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \ 10643 (__v8di)(__m512i) (INDEX), \ 10644 (__v8df)(__m512d) (V1), (int) (SCALE)) 10645 10646 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 10647 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \ 10648 (__v8di)(__m512i) (INDEX), \ 10649 (__v8df)(__m512d) (V1), (int) (SCALE)) 10650 10651 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 10652 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \ 10653 (__v16si)(__m512i) (INDEX), \ 10654 (__v16si)(__m512i) (V1), (int) (SCALE)) 10655 10656 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 10657 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \ 10658 (__v16si)(__m512i) (INDEX), \ 10659 (__v16si)(__m512i) (V1), (int) (SCALE)) 10660 10661 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 10662 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \ 10663 (__v8si)(__m256i) (INDEX), \ 10664 (__v8di)(__m512i) (V1), (int) (SCALE)) 10665 10666 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 10667 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \ 10668 (__v8si)(__m256i) (INDEX), \ 10669 (__v8di)(__m512i) (V1), (int) (SCALE)) 10670 10671 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 10672 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \ 10673 (__v8di)(__m512i) (INDEX), \ 10674 (__v8si)(__m256i) (V1), (int) (SCALE)) 10675 10676 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 10677 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \ 10678 (__v8di)(__m512i) (INDEX), \ 10679 (__v8si)(__m256i) (V1), (int) (SCALE)) 10680 10681 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 10682 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \ 10683 (__v8di)(__m512i) (INDEX), \ 10684 (__v8di)(__m512i) (V1), (int) (SCALE)) 10685 10686 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 10687 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \ 10688 (__v8di)(__m512i) (INDEX), \ 10689 (__v8di)(__m512i) (V1), (int) (SCALE)) 10690 #endif 10691 10692 extern __inline __m512d 10693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10694 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 10695 { 10696 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 10697 (__v8df) __W, 10698 (__mmask8) __U); 10699 } 10700 10701 extern __inline __m512d 10702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10703 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 10704 { 10705 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 10706 (__v8df) 10707 _mm512_setzero_pd (), 10708 (__mmask8) __U); 10709 } 10710 10711 extern __inline void 10712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10713 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 10714 { 10715 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 10716 (__mmask8) __U); 10717 } 10718 10719 extern __inline __m512 10720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10721 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 10722 { 10723 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 10724 (__v16sf) __W, 10725 (__mmask16) __U); 10726 } 10727 10728 extern __inline __m512 10729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10730 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 10731 { 10732 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 10733 (__v16sf) 10734 _mm512_setzero_ps (), 10735 (__mmask16) __U); 10736 } 10737 10738 extern __inline void 10739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10740 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 10741 { 10742 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 10743 (__mmask16) __U); 10744 } 10745 10746 extern __inline __m512i 10747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10748 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 10749 { 10750 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 10751 (__v8di) __W, 10752 (__mmask8) __U); 10753 } 10754 10755 extern __inline __m512i 10756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10757 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 10758 { 10759 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 10760 (__v8di) 10761 _mm512_setzero_si512 (), 10762 (__mmask8) __U); 10763 } 10764 10765 extern __inline void 10766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10767 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 10768 { 10769 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 10770 (__mmask8) __U); 10771 } 10772 10773 extern __inline __m512i 10774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10775 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 10776 { 10777 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 10778 (__v16si) __W, 10779 (__mmask16) __U); 10780 } 10781 10782 extern __inline __m512i 10783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10784 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 10785 { 10786 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 10787 (__v16si) 10788 _mm512_setzero_si512 (), 10789 (__mmask16) __U); 10790 } 10791 10792 extern __inline void 10793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10794 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 10795 { 10796 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 10797 (__mmask16) __U); 10798 } 10799 10800 extern __inline __m512d 10801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10802 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 10803 { 10804 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 10805 (__v8df) __W, 10806 (__mmask8) __U); 10807 } 10808 10809 extern __inline __m512d 10810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10811 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 10812 { 10813 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, 10814 (__v8df) 10815 _mm512_setzero_pd (), 10816 (__mmask8) __U); 10817 } 10818 10819 extern __inline __m512d 10820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10821 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) 10822 { 10823 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, 10824 (__v8df) __W, 10825 (__mmask8) __U); 10826 } 10827 10828 extern __inline __m512d 10829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10830 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 10831 { 10832 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, 10833 (__v8df) 10834 _mm512_setzero_pd (), 10835 (__mmask8) __U); 10836 } 10837 10838 extern __inline __m512 10839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10840 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 10841 { 10842 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 10843 (__v16sf) __W, 10844 (__mmask16) __U); 10845 } 10846 10847 extern __inline __m512 10848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10849 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 10850 { 10851 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, 10852 (__v16sf) 10853 _mm512_setzero_ps (), 10854 (__mmask16) __U); 10855 } 10856 10857 extern __inline __m512 10858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10859 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) 10860 { 10861 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, 10862 (__v16sf) __W, 10863 (__mmask16) __U); 10864 } 10865 10866 extern __inline __m512 10867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10868 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) 10869 { 10870 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, 10871 (__v16sf) 10872 _mm512_setzero_ps (), 10873 (__mmask16) __U); 10874 } 10875 10876 extern __inline __m512i 10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10878 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 10879 { 10880 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 10881 (__v8di) __W, 10882 (__mmask8) __U); 10883 } 10884 10885 extern __inline __m512i 10886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10887 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) 10888 { 10889 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, 10890 (__v8di) 10891 _mm512_setzero_si512 (), 10892 (__mmask8) __U); 10893 } 10894 10895 extern __inline __m512i 10896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10897 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 10898 { 10899 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, 10900 (__v8di) __W, 10901 (__mmask8) __U); 10902 } 10903 10904 extern __inline __m512i 10905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10906 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 10907 { 10908 return (__m512i) 10909 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, 10910 (__v8di) 10911 _mm512_setzero_si512 (), 10912 (__mmask8) __U); 10913 } 10914 10915 extern __inline __m512i 10916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10917 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 10918 { 10919 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 10920 (__v16si) __W, 10921 (__mmask16) __U); 10922 } 10923 10924 extern __inline __m512i 10925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10926 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 10927 { 10928 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, 10929 (__v16si) 10930 _mm512_setzero_si512 (), 10931 (__mmask16) __U); 10932 } 10933 10934 extern __inline __m512i 10935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10936 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 10937 { 10938 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, 10939 (__v16si) __W, 10940 (__mmask16) __U); 10941 } 10942 10943 extern __inline __m512i 10944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10945 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) 10946 { 10947 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, 10948 (__v16si) 10949 _mm512_setzero_si512 10950 (), (__mmask16) __U); 10951 } 10952 10953 /* Mask arithmetic operations */ 10954 #define _kand_mask16 _mm512_kand 10955 #define _kandn_mask16 _mm512_kandn 10956 #define _knot_mask16 _mm512_knot 10957 #define _kor_mask16 _mm512_kor 10958 #define _kxnor_mask16 _mm512_kxnor 10959 #define _kxor_mask16 _mm512_kxor 10960 10961 extern __inline unsigned char 10962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10963 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF) 10964 { 10965 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B); 10966 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B); 10967 } 10968 10969 extern __inline unsigned char 10970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10971 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B) 10972 { 10973 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A, 10974 (__mmask16) __B); 10975 } 10976 10977 extern __inline unsigned char 10978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10979 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B) 10980 { 10981 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A, 10982 (__mmask16) __B); 10983 } 10984 10985 extern __inline unsigned int 10986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10987 _cvtmask16_u32 (__mmask16 __A) 10988 { 10989 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A); 10990 } 10991 10992 extern __inline __mmask16 10993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10994 _cvtu32_mask16 (unsigned int __A) 10995 { 10996 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A); 10997 } 10998 10999 extern __inline __mmask16 11000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11001 _load_mask16 (__mmask16 *__A) 11002 { 11003 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A); 11004 } 11005 11006 extern __inline void 11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11008 _store_mask16 (__mmask16 *__A, __mmask16 __B) 11009 { 11010 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B); 11011 } 11012 11013 extern __inline __mmask16 11014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11015 _mm512_kand (__mmask16 __A, __mmask16 __B) 11016 { 11017 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 11018 } 11019 11020 extern __inline __mmask16 11021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11022 _mm512_kandn (__mmask16 __A, __mmask16 __B) 11023 { 11024 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, 11025 (__mmask16) __B); 11026 } 11027 11028 extern __inline __mmask16 11029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11030 _mm512_kor (__mmask16 __A, __mmask16 __B) 11031 { 11032 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 11033 } 11034 11035 extern __inline int 11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11037 _mm512_kortestz (__mmask16 __A, __mmask16 __B) 11038 { 11039 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, 11040 (__mmask16) __B); 11041 } 11042 11043 extern __inline int 11044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11045 _mm512_kortestc (__mmask16 __A, __mmask16 __B) 11046 { 11047 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, 11048 (__mmask16) __B); 11049 } 11050 11051 extern __inline __mmask16 11052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11053 _mm512_kxnor (__mmask16 __A, __mmask16 __B) 11054 { 11055 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 11056 } 11057 11058 extern __inline __mmask16 11059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11060 _mm512_kxor (__mmask16 __A, __mmask16 __B) 11061 { 11062 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 11063 } 11064 11065 extern __inline __mmask16 11066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11067 _mm512_knot (__mmask16 __A) 11068 { 11069 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); 11070 } 11071 11072 extern __inline __mmask16 11073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11074 _mm512_kunpackb (__mmask16 __A, __mmask16 __B) 11075 { 11076 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 11077 } 11078 11079 extern __inline __mmask16 11080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11081 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B) 11082 { 11083 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 11084 } 11085 11086 #ifdef __OPTIMIZE__ 11087 extern __inline __m512i 11088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11089 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, 11090 const int __imm) 11091 { 11092 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 11093 (__v4si) __D, 11094 __imm, 11095 (__v16si) 11096 _mm512_setzero_si512 (), 11097 __B); 11098 } 11099 11100 extern __inline __m512 11101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11102 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, 11103 const int __imm) 11104 { 11105 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 11106 (__v4sf) __D, 11107 __imm, 11108 (__v16sf) 11109 _mm512_setzero_ps (), __B); 11110 } 11111 11112 extern __inline __m512i 11113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11114 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, 11115 __m128i __D, const int __imm) 11116 { 11117 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 11118 (__v4si) __D, 11119 __imm, 11120 (__v16si) __A, 11121 __B); 11122 } 11123 11124 extern __inline __m512 11125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11126 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, 11127 __m128 __D, const int __imm) 11128 { 11129 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 11130 (__v4sf) __D, 11131 __imm, 11132 (__v16sf) __A, __B); 11133 } 11134 #else 11135 #define _mm512_maskz_insertf32x4(A, X, Y, C) \ 11136 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 11137 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ 11138 (__mmask16)(A))) 11139 11140 #define _mm512_maskz_inserti32x4(A, X, Y, C) \ 11141 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 11142 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ 11143 (__mmask16)(A))) 11144 11145 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \ 11146 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 11147 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ 11148 (__mmask16)(B))) 11149 11150 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \ 11151 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 11152 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ 11153 (__mmask16)(B))) 11154 #endif 11155 11156 extern __inline __m512i 11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11158 _mm512_max_epi64 (__m512i __A, __m512i __B) 11159 { 11160 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 11161 (__v8di) __B, 11162 (__v8di) 11163 _mm512_undefined_epi32 (), 11164 (__mmask8) -1); 11165 } 11166 11167 extern __inline __m512i 11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11169 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 11170 { 11171 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 11172 (__v8di) __B, 11173 (__v8di) 11174 _mm512_setzero_si512 (), 11175 __M); 11176 } 11177 11178 extern __inline __m512i 11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11180 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11181 { 11182 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 11183 (__v8di) __B, 11184 (__v8di) __W, __M); 11185 } 11186 11187 extern __inline __m512i 11188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11189 _mm512_min_epi64 (__m512i __A, __m512i __B) 11190 { 11191 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 11192 (__v8di) __B, 11193 (__v8di) 11194 _mm512_undefined_epi32 (), 11195 (__mmask8) -1); 11196 } 11197 11198 extern __inline __m512i 11199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11200 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11201 { 11202 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 11203 (__v8di) __B, 11204 (__v8di) __W, __M); 11205 } 11206 11207 extern __inline __m512i 11208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11209 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 11210 { 11211 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 11212 (__v8di) __B, 11213 (__v8di) 11214 _mm512_setzero_si512 (), 11215 __M); 11216 } 11217 11218 extern __inline __m512i 11219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11220 _mm512_max_epu64 (__m512i __A, __m512i __B) 11221 { 11222 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 11223 (__v8di) __B, 11224 (__v8di) 11225 _mm512_undefined_epi32 (), 11226 (__mmask8) -1); 11227 } 11228 11229 extern __inline __m512i 11230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11231 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 11232 { 11233 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 11234 (__v8di) __B, 11235 (__v8di) 11236 _mm512_setzero_si512 (), 11237 __M); 11238 } 11239 11240 extern __inline __m512i 11241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11242 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11243 { 11244 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 11245 (__v8di) __B, 11246 (__v8di) __W, __M); 11247 } 11248 11249 extern __inline __m512i 11250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11251 _mm512_min_epu64 (__m512i __A, __m512i __B) 11252 { 11253 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 11254 (__v8di) __B, 11255 (__v8di) 11256 _mm512_undefined_epi32 (), 11257 (__mmask8) -1); 11258 } 11259 11260 extern __inline __m512i 11261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11262 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11263 { 11264 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 11265 (__v8di) __B, 11266 (__v8di) __W, __M); 11267 } 11268 11269 extern __inline __m512i 11270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11271 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 11272 { 11273 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 11274 (__v8di) __B, 11275 (__v8di) 11276 _mm512_setzero_si512 (), 11277 __M); 11278 } 11279 11280 extern __inline __m512i 11281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11282 _mm512_max_epi32 (__m512i __A, __m512i __B) 11283 { 11284 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 11285 (__v16si) __B, 11286 (__v16si) 11287 _mm512_undefined_epi32 (), 11288 (__mmask16) -1); 11289 } 11290 11291 extern __inline __m512i 11292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11293 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 11294 { 11295 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 11296 (__v16si) __B, 11297 (__v16si) 11298 _mm512_setzero_si512 (), 11299 __M); 11300 } 11301 11302 extern __inline __m512i 11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11304 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11305 { 11306 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 11307 (__v16si) __B, 11308 (__v16si) __W, __M); 11309 } 11310 11311 extern __inline __m512i 11312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11313 _mm512_min_epi32 (__m512i __A, __m512i __B) 11314 { 11315 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 11316 (__v16si) __B, 11317 (__v16si) 11318 _mm512_undefined_epi32 (), 11319 (__mmask16) -1); 11320 } 11321 11322 extern __inline __m512i 11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11324 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 11325 { 11326 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 11327 (__v16si) __B, 11328 (__v16si) 11329 _mm512_setzero_si512 (), 11330 __M); 11331 } 11332 11333 extern __inline __m512i 11334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11335 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11336 { 11337 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 11338 (__v16si) __B, 11339 (__v16si) __W, __M); 11340 } 11341 11342 extern __inline __m512i 11343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11344 _mm512_max_epu32 (__m512i __A, __m512i __B) 11345 { 11346 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11347 (__v16si) __B, 11348 (__v16si) 11349 _mm512_undefined_epi32 (), 11350 (__mmask16) -1); 11351 } 11352 11353 extern __inline __m512i 11354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11355 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 11356 { 11357 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11358 (__v16si) __B, 11359 (__v16si) 11360 _mm512_setzero_si512 (), 11361 __M); 11362 } 11363 11364 extern __inline __m512i 11365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11366 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11367 { 11368 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11369 (__v16si) __B, 11370 (__v16si) __W, __M); 11371 } 11372 11373 extern __inline __m512i 11374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11375 _mm512_min_epu32 (__m512i __A, __m512i __B) 11376 { 11377 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11378 (__v16si) __B, 11379 (__v16si) 11380 _mm512_undefined_epi32 (), 11381 (__mmask16) -1); 11382 } 11383 11384 extern __inline __m512i 11385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11386 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 11387 { 11388 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11389 (__v16si) __B, 11390 (__v16si) 11391 _mm512_setzero_si512 (), 11392 __M); 11393 } 11394 11395 extern __inline __m512i 11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11397 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11398 { 11399 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11400 (__v16si) __B, 11401 (__v16si) __W, __M); 11402 } 11403 11404 extern __inline __m512 11405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11406 _mm512_unpacklo_ps (__m512 __A, __m512 __B) 11407 { 11408 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11409 (__v16sf) __B, 11410 (__v16sf) 11411 _mm512_undefined_ps (), 11412 (__mmask16) -1); 11413 } 11414 11415 extern __inline __m512 11416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11417 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11418 { 11419 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11420 (__v16sf) __B, 11421 (__v16sf) __W, 11422 (__mmask16) __U); 11423 } 11424 11425 extern __inline __m512 11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11427 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 11428 { 11429 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11430 (__v16sf) __B, 11431 (__v16sf) 11432 _mm512_setzero_ps (), 11433 (__mmask16) __U); 11434 } 11435 11436 #ifdef __OPTIMIZE__ 11437 extern __inline __m128d 11438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11439 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R) 11440 { 11441 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 11442 (__v2df) __B, 11443 __R); 11444 } 11445 11446 extern __inline __m128d 11447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11448 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 11449 __m128d __B, const int __R) 11450 { 11451 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 11452 (__v2df) __B, 11453 (__v2df) __W, 11454 (__mmask8) __U, __R); 11455 } 11456 11457 extern __inline __m128d 11458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11459 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 11460 const int __R) 11461 { 11462 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 11463 (__v2df) __B, 11464 (__v2df) 11465 _mm_setzero_pd (), 11466 (__mmask8) __U, __R); 11467 } 11468 11469 extern __inline __m128 11470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11471 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R) 11472 { 11473 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 11474 (__v4sf) __B, 11475 __R); 11476 } 11477 11478 extern __inline __m128 11479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11480 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 11481 __m128 __B, const int __R) 11482 { 11483 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 11484 (__v4sf) __B, 11485 (__v4sf) __W, 11486 (__mmask8) __U, __R); 11487 } 11488 11489 extern __inline __m128 11490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11491 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 11492 const int __R) 11493 { 11494 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 11495 (__v4sf) __B, 11496 (__v4sf) 11497 _mm_setzero_ps (), 11498 (__mmask8) __U, __R); 11499 } 11500 11501 extern __inline __m128d 11502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11503 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R) 11504 { 11505 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 11506 (__v2df) __B, 11507 __R); 11508 } 11509 11510 extern __inline __m128d 11511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11512 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 11513 __m128d __B, const int __R) 11514 { 11515 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 11516 (__v2df) __B, 11517 (__v2df) __W, 11518 (__mmask8) __U, __R); 11519 } 11520 11521 extern __inline __m128d 11522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11523 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 11524 const int __R) 11525 { 11526 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 11527 (__v2df) __B, 11528 (__v2df) 11529 _mm_setzero_pd (), 11530 (__mmask8) __U, __R); 11531 } 11532 11533 extern __inline __m128 11534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11535 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R) 11536 { 11537 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 11538 (__v4sf) __B, 11539 __R); 11540 } 11541 11542 extern __inline __m128 11543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11544 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 11545 __m128 __B, const int __R) 11546 { 11547 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 11548 (__v4sf) __B, 11549 (__v4sf) __W, 11550 (__mmask8) __U, __R); 11551 } 11552 11553 extern __inline __m128 11554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11555 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 11556 const int __R) 11557 { 11558 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 11559 (__v4sf) __B, 11560 (__v4sf) 11561 _mm_setzero_ps (), 11562 (__mmask8) __U, __R); 11563 } 11564 11565 #else 11566 #define _mm_max_round_sd(A, B, C) \ 11567 (__m128d)__builtin_ia32_maxsd_round(A, B, C) 11568 11569 #define _mm_mask_max_round_sd(W, U, A, B, C) \ 11570 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C) 11571 11572 #define _mm_maskz_max_round_sd(U, A, B, C) \ 11573 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 11574 11575 #define _mm_max_round_ss(A, B, C) \ 11576 (__m128)__builtin_ia32_maxss_round(A, B, C) 11577 11578 #define _mm_mask_max_round_ss(W, U, A, B, C) \ 11579 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C) 11580 11581 #define _mm_maskz_max_round_ss(U, A, B, C) \ 11582 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 11583 11584 #define _mm_min_round_sd(A, B, C) \ 11585 (__m128d)__builtin_ia32_minsd_round(A, B, C) 11586 11587 #define _mm_mask_min_round_sd(W, U, A, B, C) \ 11588 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C) 11589 11590 #define _mm_maskz_min_round_sd(U, A, B, C) \ 11591 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 11592 11593 #define _mm_min_round_ss(A, B, C) \ 11594 (__m128)__builtin_ia32_minss_round(A, B, C) 11595 11596 #define _mm_mask_min_round_ss(W, U, A, B, C) \ 11597 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C) 11598 11599 #define _mm_maskz_min_round_ss(U, A, B, C) \ 11600 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 11601 11602 #endif 11603 11604 extern __inline __m512d 11605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11606 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) 11607 { 11608 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 11609 (__v8df) __W, 11610 (__mmask8) __U); 11611 } 11612 11613 extern __inline __m512 11614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11615 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) 11616 { 11617 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 11618 (__v16sf) __W, 11619 (__mmask16) __U); 11620 } 11621 11622 extern __inline __m512i 11623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11624 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) 11625 { 11626 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 11627 (__v8di) __W, 11628 (__mmask8) __U); 11629 } 11630 11631 extern __inline __m512i 11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11633 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) 11634 { 11635 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 11636 (__v16si) __W, 11637 (__mmask16) __U); 11638 } 11639 11640 #ifdef __OPTIMIZE__ 11641 extern __inline __m128d 11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11643 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11644 { 11645 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11646 (__v2df) __A, 11647 (__v2df) __B, 11648 __R); 11649 } 11650 11651 extern __inline __m128 11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11653 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11654 { 11655 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11656 (__v4sf) __A, 11657 (__v4sf) __B, 11658 __R); 11659 } 11660 11661 extern __inline __m128d 11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11663 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11664 { 11665 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11666 (__v2df) __A, 11667 -(__v2df) __B, 11668 __R); 11669 } 11670 11671 extern __inline __m128 11672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11673 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11674 { 11675 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11676 (__v4sf) __A, 11677 -(__v4sf) __B, 11678 __R); 11679 } 11680 11681 extern __inline __m128d 11682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11683 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11684 { 11685 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11686 -(__v2df) __A, 11687 (__v2df) __B, 11688 __R); 11689 } 11690 11691 extern __inline __m128 11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11693 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11694 { 11695 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11696 -(__v4sf) __A, 11697 (__v4sf) __B, 11698 __R); 11699 } 11700 11701 extern __inline __m128d 11702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11703 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11704 { 11705 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11706 -(__v2df) __A, 11707 -(__v2df) __B, 11708 __R); 11709 } 11710 11711 extern __inline __m128 11712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11713 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11714 { 11715 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11716 -(__v4sf) __A, 11717 -(__v4sf) __B, 11718 __R); 11719 } 11720 #else 11721 #define _mm_fmadd_round_sd(A, B, C, R) \ 11722 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R) 11723 11724 #define _mm_fmadd_round_ss(A, B, C, R) \ 11725 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R) 11726 11727 #define _mm_fmsub_round_sd(A, B, C, R) \ 11728 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R) 11729 11730 #define _mm_fmsub_round_ss(A, B, C, R) \ 11731 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R) 11732 11733 #define _mm_fnmadd_round_sd(A, B, C, R) \ 11734 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R) 11735 11736 #define _mm_fnmadd_round_ss(A, B, C, R) \ 11737 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R) 11738 11739 #define _mm_fnmsub_round_sd(A, B, C, R) \ 11740 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R) 11741 11742 #define _mm_fnmsub_round_ss(A, B, C, R) \ 11743 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) 11744 #endif 11745 11746 extern __inline __m128d 11747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11748 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11749 { 11750 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 11751 (__v2df) __A, 11752 (__v2df) __B, 11753 (__mmask8) __U, 11754 _MM_FROUND_CUR_DIRECTION); 11755 } 11756 11757 extern __inline __m128 11758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11759 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11760 { 11761 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 11762 (__v4sf) __A, 11763 (__v4sf) __B, 11764 (__mmask8) __U, 11765 _MM_FROUND_CUR_DIRECTION); 11766 } 11767 11768 extern __inline __m128d 11769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11770 _mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) 11771 { 11772 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 11773 (__v2df) __A, 11774 (__v2df) __B, 11775 (__mmask8) __U, 11776 _MM_FROUND_CUR_DIRECTION); 11777 } 11778 11779 extern __inline __m128 11780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11781 _mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) 11782 { 11783 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 11784 (__v4sf) __A, 11785 (__v4sf) __B, 11786 (__mmask8) __U, 11787 _MM_FROUND_CUR_DIRECTION); 11788 } 11789 11790 extern __inline __m128d 11791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11792 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) 11793 { 11794 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 11795 (__v2df) __A, 11796 (__v2df) __B, 11797 (__mmask8) __U, 11798 _MM_FROUND_CUR_DIRECTION); 11799 } 11800 11801 extern __inline __m128 11802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11803 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) 11804 { 11805 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 11806 (__v4sf) __A, 11807 (__v4sf) __B, 11808 (__mmask8) __U, 11809 _MM_FROUND_CUR_DIRECTION); 11810 } 11811 11812 extern __inline __m128d 11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11814 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11815 { 11816 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 11817 (__v2df) __A, 11818 -(__v2df) __B, 11819 (__mmask8) __U, 11820 _MM_FROUND_CUR_DIRECTION); 11821 } 11822 11823 extern __inline __m128 11824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11825 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11826 { 11827 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 11828 (__v4sf) __A, 11829 -(__v4sf) __B, 11830 (__mmask8) __U, 11831 _MM_FROUND_CUR_DIRECTION); 11832 } 11833 11834 extern __inline __m128d 11835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11836 _mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) 11837 { 11838 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 11839 (__v2df) __A, 11840 (__v2df) __B, 11841 (__mmask8) __U, 11842 _MM_FROUND_CUR_DIRECTION); 11843 } 11844 11845 extern __inline __m128 11846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11847 _mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) 11848 { 11849 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 11850 (__v4sf) __A, 11851 (__v4sf) __B, 11852 (__mmask8) __U, 11853 _MM_FROUND_CUR_DIRECTION); 11854 } 11855 11856 extern __inline __m128d 11857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11858 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) 11859 { 11860 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 11861 (__v2df) __A, 11862 -(__v2df) __B, 11863 (__mmask8) __U, 11864 _MM_FROUND_CUR_DIRECTION); 11865 } 11866 11867 extern __inline __m128 11868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11869 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) 11870 { 11871 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 11872 (__v4sf) __A, 11873 -(__v4sf) __B, 11874 (__mmask8) __U, 11875 _MM_FROUND_CUR_DIRECTION); 11876 } 11877 11878 extern __inline __m128d 11879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11880 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11881 { 11882 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 11883 -(__v2df) __A, 11884 (__v2df) __B, 11885 (__mmask8) __U, 11886 _MM_FROUND_CUR_DIRECTION); 11887 } 11888 11889 extern __inline __m128 11890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11891 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11892 { 11893 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 11894 -(__v4sf) __A, 11895 (__v4sf) __B, 11896 (__mmask8) __U, 11897 _MM_FROUND_CUR_DIRECTION); 11898 } 11899 11900 extern __inline __m128d 11901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11902 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) 11903 { 11904 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 11905 -(__v2df) __A, 11906 (__v2df) __B, 11907 (__mmask8) __U, 11908 _MM_FROUND_CUR_DIRECTION); 11909 } 11910 11911 extern __inline __m128 11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11913 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) 11914 { 11915 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 11916 -(__v4sf) __A, 11917 (__v4sf) __B, 11918 (__mmask8) __U, 11919 _MM_FROUND_CUR_DIRECTION); 11920 } 11921 11922 extern __inline __m128d 11923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11924 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) 11925 { 11926 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 11927 -(__v2df) __A, 11928 (__v2df) __B, 11929 (__mmask8) __U, 11930 _MM_FROUND_CUR_DIRECTION); 11931 } 11932 11933 extern __inline __m128 11934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11935 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) 11936 { 11937 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 11938 -(__v4sf) __A, 11939 (__v4sf) __B, 11940 (__mmask8) __U, 11941 _MM_FROUND_CUR_DIRECTION); 11942 } 11943 11944 extern __inline __m128d 11945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11946 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11947 { 11948 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 11949 -(__v2df) __A, 11950 -(__v2df) __B, 11951 (__mmask8) __U, 11952 _MM_FROUND_CUR_DIRECTION); 11953 } 11954 11955 extern __inline __m128 11956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11957 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11958 { 11959 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 11960 -(__v4sf) __A, 11961 -(__v4sf) __B, 11962 (__mmask8) __U, 11963 _MM_FROUND_CUR_DIRECTION); 11964 } 11965 11966 extern __inline __m128d 11967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11968 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) 11969 { 11970 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 11971 -(__v2df) __A, 11972 (__v2df) __B, 11973 (__mmask8) __U, 11974 _MM_FROUND_CUR_DIRECTION); 11975 } 11976 11977 extern __inline __m128 11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11979 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) 11980 { 11981 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 11982 -(__v4sf) __A, 11983 (__v4sf) __B, 11984 (__mmask8) __U, 11985 _MM_FROUND_CUR_DIRECTION); 11986 } 11987 11988 extern __inline __m128d 11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11990 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) 11991 { 11992 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 11993 -(__v2df) __A, 11994 -(__v2df) __B, 11995 (__mmask8) __U, 11996 _MM_FROUND_CUR_DIRECTION); 11997 } 11998 11999 extern __inline __m128 12000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12001 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) 12002 { 12003 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 12004 -(__v4sf) __A, 12005 -(__v4sf) __B, 12006 (__mmask8) __U, 12007 _MM_FROUND_CUR_DIRECTION); 12008 } 12009 12010 #ifdef __OPTIMIZE__ 12011 extern __inline __m128d 12012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12013 _mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 12014 const int __R) 12015 { 12016 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 12017 (__v2df) __A, 12018 (__v2df) __B, 12019 (__mmask8) __U, __R); 12020 } 12021 12022 extern __inline __m128 12023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12024 _mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 12025 const int __R) 12026 { 12027 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 12028 (__v4sf) __A, 12029 (__v4sf) __B, 12030 (__mmask8) __U, __R); 12031 } 12032 12033 extern __inline __m128d 12034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12035 _mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, 12036 const int __R) 12037 { 12038 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 12039 (__v2df) __A, 12040 (__v2df) __B, 12041 (__mmask8) __U, __R); 12042 } 12043 12044 extern __inline __m128 12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12046 _mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, 12047 const int __R) 12048 { 12049 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 12050 (__v4sf) __A, 12051 (__v4sf) __B, 12052 (__mmask8) __U, __R); 12053 } 12054 12055 extern __inline __m128d 12056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12057 _mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, 12058 const int __R) 12059 { 12060 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 12061 (__v2df) __A, 12062 (__v2df) __B, 12063 (__mmask8) __U, __R); 12064 } 12065 12066 extern __inline __m128 12067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12068 _mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, 12069 const int __R) 12070 { 12071 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 12072 (__v4sf) __A, 12073 (__v4sf) __B, 12074 (__mmask8) __U, __R); 12075 } 12076 12077 extern __inline __m128d 12078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12079 _mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 12080 const int __R) 12081 { 12082 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 12083 (__v2df) __A, 12084 -(__v2df) __B, 12085 (__mmask8) __U, __R); 12086 } 12087 12088 extern __inline __m128 12089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12090 _mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 12091 const int __R) 12092 { 12093 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 12094 (__v4sf) __A, 12095 -(__v4sf) __B, 12096 (__mmask8) __U, __R); 12097 } 12098 12099 extern __inline __m128d 12100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12101 _mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, 12102 const int __R) 12103 { 12104 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 12105 (__v2df) __A, 12106 (__v2df) __B, 12107 (__mmask8) __U, __R); 12108 } 12109 12110 extern __inline __m128 12111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12112 _mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, 12113 const int __R) 12114 { 12115 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 12116 (__v4sf) __A, 12117 (__v4sf) __B, 12118 (__mmask8) __U, __R); 12119 } 12120 12121 extern __inline __m128d 12122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12123 _mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, 12124 const int __R) 12125 { 12126 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 12127 (__v2df) __A, 12128 -(__v2df) __B, 12129 (__mmask8) __U, __R); 12130 } 12131 12132 extern __inline __m128 12133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12134 _mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, 12135 const int __R) 12136 { 12137 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 12138 (__v4sf) __A, 12139 -(__v4sf) __B, 12140 (__mmask8) __U, __R); 12141 } 12142 12143 extern __inline __m128d 12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12145 _mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 12146 const int __R) 12147 { 12148 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 12149 -(__v2df) __A, 12150 (__v2df) __B, 12151 (__mmask8) __U, __R); 12152 } 12153 12154 extern __inline __m128 12155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12156 _mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 12157 const int __R) 12158 { 12159 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 12160 -(__v4sf) __A, 12161 (__v4sf) __B, 12162 (__mmask8) __U, __R); 12163 } 12164 12165 extern __inline __m128d 12166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12167 _mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, 12168 const int __R) 12169 { 12170 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 12171 -(__v2df) __A, 12172 (__v2df) __B, 12173 (__mmask8) __U, __R); 12174 } 12175 12176 extern __inline __m128 12177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12178 _mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, 12179 const int __R) 12180 { 12181 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 12182 -(__v4sf) __A, 12183 (__v4sf) __B, 12184 (__mmask8) __U, __R); 12185 } 12186 12187 extern __inline __m128d 12188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12189 _mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, 12190 const int __R) 12191 { 12192 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 12193 -(__v2df) __A, 12194 (__v2df) __B, 12195 (__mmask8) __U, __R); 12196 } 12197 12198 extern __inline __m128 12199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12200 _mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, 12201 const int __R) 12202 { 12203 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 12204 -(__v4sf) __A, 12205 (__v4sf) __B, 12206 (__mmask8) __U, __R); 12207 } 12208 12209 extern __inline __m128d 12210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12211 _mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 12212 const int __R) 12213 { 12214 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, 12215 -(__v2df) __A, 12216 -(__v2df) __B, 12217 (__mmask8) __U, __R); 12218 } 12219 12220 extern __inline __m128 12221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12222 _mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 12223 const int __R) 12224 { 12225 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, 12226 -(__v4sf) __A, 12227 -(__v4sf) __B, 12228 (__mmask8) __U, __R); 12229 } 12230 12231 extern __inline __m128d 12232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12233 _mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U, 12234 const int __R) 12235 { 12236 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, 12237 -(__v2df) __A, 12238 (__v2df) __B, 12239 (__mmask8) __U, __R); 12240 } 12241 12242 extern __inline __m128 12243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12244 _mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, 12245 const int __R) 12246 { 12247 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, 12248 -(__v4sf) __A, 12249 (__v4sf) __B, 12250 (__mmask8) __U, __R); 12251 } 12252 12253 extern __inline __m128d 12254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12255 _mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B, 12256 const int __R) 12257 { 12258 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, 12259 -(__v2df) __A, 12260 -(__v2df) __B, 12261 (__mmask8) __U, __R); 12262 } 12263 12264 extern __inline __m128 12265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12266 _mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, 12267 const int __R) 12268 { 12269 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, 12270 -(__v4sf) __A, 12271 -(__v4sf) __B, 12272 (__mmask8) __U, __R); 12273 } 12274 #else 12275 #define _mm_mask_fmadd_round_sd(A, U, B, C, R) \ 12276 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R) 12277 12278 #define _mm_mask_fmadd_round_ss(A, U, B, C, R) \ 12279 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R) 12280 12281 #define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \ 12282 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) 12283 12284 #define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \ 12285 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) 12286 12287 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 12288 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R) 12289 12290 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 12291 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R) 12292 12293 #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \ 12294 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R) 12295 12296 #define _mm_mask_fmsub_round_ss(A, U, B, C, R) \ 12297 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R) 12298 12299 #define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \ 12300 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R) 12301 12302 #define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \ 12303 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R) 12304 12305 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 12306 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R) 12307 12308 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 12309 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R) 12310 12311 #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \ 12312 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R) 12313 12314 #define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \ 12315 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R) 12316 12317 #define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \ 12318 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) 12319 12320 #define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \ 12321 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) 12322 12323 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 12324 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R) 12325 12326 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 12327 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R) 12328 12329 #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \ 12330 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R) 12331 12332 #define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \ 12333 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R) 12334 12335 #define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \ 12336 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R) 12337 12338 #define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \ 12339 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R) 12340 12341 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 12342 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R) 12343 12344 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 12345 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R) 12346 #endif 12347 12348 #ifdef __OPTIMIZE__ 12349 extern __inline int 12350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12351 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) 12352 { 12353 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); 12354 } 12355 12356 extern __inline int 12357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12358 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) 12359 { 12360 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); 12361 } 12362 #else 12363 #define _mm_comi_round_ss(A, B, C, D)\ 12364 __builtin_ia32_vcomiss(A, B, C, D) 12365 #define _mm_comi_round_sd(A, B, C, D)\ 12366 __builtin_ia32_vcomisd(A, B, C, D) 12367 #endif 12368 12369 extern __inline __m512d 12370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12371 _mm512_sqrt_pd (__m512d __A) 12372 { 12373 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 12374 (__v8df) 12375 _mm512_undefined_pd (), 12376 (__mmask8) -1, 12377 _MM_FROUND_CUR_DIRECTION); 12378 } 12379 12380 extern __inline __m512d 12381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12382 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 12383 { 12384 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 12385 (__v8df) __W, 12386 (__mmask8) __U, 12387 _MM_FROUND_CUR_DIRECTION); 12388 } 12389 12390 extern __inline __m512d 12391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12392 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 12393 { 12394 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 12395 (__v8df) 12396 _mm512_setzero_pd (), 12397 (__mmask8) __U, 12398 _MM_FROUND_CUR_DIRECTION); 12399 } 12400 12401 extern __inline __m512 12402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12403 _mm512_sqrt_ps (__m512 __A) 12404 { 12405 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 12406 (__v16sf) 12407 _mm512_undefined_ps (), 12408 (__mmask16) -1, 12409 _MM_FROUND_CUR_DIRECTION); 12410 } 12411 12412 extern __inline __m512 12413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12414 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) 12415 { 12416 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 12417 (__v16sf) __W, 12418 (__mmask16) __U, 12419 _MM_FROUND_CUR_DIRECTION); 12420 } 12421 12422 extern __inline __m512 12423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12424 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) 12425 { 12426 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 12427 (__v16sf) 12428 _mm512_setzero_ps (), 12429 (__mmask16) __U, 12430 _MM_FROUND_CUR_DIRECTION); 12431 } 12432 12433 extern __inline __m512d 12434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12435 _mm512_add_pd (__m512d __A, __m512d __B) 12436 { 12437 return (__m512d) ((__v8df)__A + (__v8df)__B); 12438 } 12439 12440 extern __inline __m512d 12441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12442 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12443 { 12444 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 12445 (__v8df) __B, 12446 (__v8df) __W, 12447 (__mmask8) __U, 12448 _MM_FROUND_CUR_DIRECTION); 12449 } 12450 12451 extern __inline __m512d 12452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12453 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) 12454 { 12455 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 12456 (__v8df) __B, 12457 (__v8df) 12458 _mm512_setzero_pd (), 12459 (__mmask8) __U, 12460 _MM_FROUND_CUR_DIRECTION); 12461 } 12462 12463 extern __inline __m512 12464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12465 _mm512_add_ps (__m512 __A, __m512 __B) 12466 { 12467 return (__m512) ((__v16sf)__A + (__v16sf)__B); 12468 } 12469 12470 extern __inline __m512 12471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12472 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12473 { 12474 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 12475 (__v16sf) __B, 12476 (__v16sf) __W, 12477 (__mmask16) __U, 12478 _MM_FROUND_CUR_DIRECTION); 12479 } 12480 12481 extern __inline __m512 12482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12483 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) 12484 { 12485 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 12486 (__v16sf) __B, 12487 (__v16sf) 12488 _mm512_setzero_ps (), 12489 (__mmask16) __U, 12490 _MM_FROUND_CUR_DIRECTION); 12491 } 12492 12493 extern __inline __m128d 12494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12495 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 12496 { 12497 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 12498 (__v2df) __B, 12499 (__v2df) __W, 12500 (__mmask8) __U, 12501 _MM_FROUND_CUR_DIRECTION); 12502 } 12503 12504 extern __inline __m128d 12505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12506 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B) 12507 { 12508 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 12509 (__v2df) __B, 12510 (__v2df) 12511 _mm_setzero_pd (), 12512 (__mmask8) __U, 12513 _MM_FROUND_CUR_DIRECTION); 12514 } 12515 12516 extern __inline __m128 12517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12518 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 12519 { 12520 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 12521 (__v4sf) __B, 12522 (__v4sf) __W, 12523 (__mmask8) __U, 12524 _MM_FROUND_CUR_DIRECTION); 12525 } 12526 12527 extern __inline __m128 12528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12529 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B) 12530 { 12531 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 12532 (__v4sf) __B, 12533 (__v4sf) 12534 _mm_setzero_ps (), 12535 (__mmask8) __U, 12536 _MM_FROUND_CUR_DIRECTION); 12537 } 12538 12539 extern __inline __m512d 12540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12541 _mm512_sub_pd (__m512d __A, __m512d __B) 12542 { 12543 return (__m512d) ((__v8df)__A - (__v8df)__B); 12544 } 12545 12546 extern __inline __m512d 12547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12548 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12549 { 12550 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 12551 (__v8df) __B, 12552 (__v8df) __W, 12553 (__mmask8) __U, 12554 _MM_FROUND_CUR_DIRECTION); 12555 } 12556 12557 extern __inline __m512d 12558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12559 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) 12560 { 12561 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 12562 (__v8df) __B, 12563 (__v8df) 12564 _mm512_setzero_pd (), 12565 (__mmask8) __U, 12566 _MM_FROUND_CUR_DIRECTION); 12567 } 12568 12569 extern __inline __m512 12570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12571 _mm512_sub_ps (__m512 __A, __m512 __B) 12572 { 12573 return (__m512) ((__v16sf)__A - (__v16sf)__B); 12574 } 12575 12576 extern __inline __m512 12577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12578 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12579 { 12580 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 12581 (__v16sf) __B, 12582 (__v16sf) __W, 12583 (__mmask16) __U, 12584 _MM_FROUND_CUR_DIRECTION); 12585 } 12586 12587 extern __inline __m512 12588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12589 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) 12590 { 12591 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 12592 (__v16sf) __B, 12593 (__v16sf) 12594 _mm512_setzero_ps (), 12595 (__mmask16) __U, 12596 _MM_FROUND_CUR_DIRECTION); 12597 } 12598 12599 extern __inline __m128d 12600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12601 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 12602 { 12603 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 12604 (__v2df) __B, 12605 (__v2df) __W, 12606 (__mmask8) __U, 12607 _MM_FROUND_CUR_DIRECTION); 12608 } 12609 12610 extern __inline __m128d 12611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12612 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B) 12613 { 12614 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 12615 (__v2df) __B, 12616 (__v2df) 12617 _mm_setzero_pd (), 12618 (__mmask8) __U, 12619 _MM_FROUND_CUR_DIRECTION); 12620 } 12621 12622 extern __inline __m128 12623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12624 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 12625 { 12626 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 12627 (__v4sf) __B, 12628 (__v4sf) __W, 12629 (__mmask8) __U, 12630 _MM_FROUND_CUR_DIRECTION); 12631 } 12632 12633 extern __inline __m128 12634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12635 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B) 12636 { 12637 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 12638 (__v4sf) __B, 12639 (__v4sf) 12640 _mm_setzero_ps (), 12641 (__mmask8) __U, 12642 _MM_FROUND_CUR_DIRECTION); 12643 } 12644 12645 extern __inline __m512d 12646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12647 _mm512_mul_pd (__m512d __A, __m512d __B) 12648 { 12649 return (__m512d) ((__v8df)__A * (__v8df)__B); 12650 } 12651 12652 extern __inline __m512d 12653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12654 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12655 { 12656 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 12657 (__v8df) __B, 12658 (__v8df) __W, 12659 (__mmask8) __U, 12660 _MM_FROUND_CUR_DIRECTION); 12661 } 12662 12663 extern __inline __m512d 12664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12665 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) 12666 { 12667 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 12668 (__v8df) __B, 12669 (__v8df) 12670 _mm512_setzero_pd (), 12671 (__mmask8) __U, 12672 _MM_FROUND_CUR_DIRECTION); 12673 } 12674 12675 extern __inline __m512 12676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12677 _mm512_mul_ps (__m512 __A, __m512 __B) 12678 { 12679 return (__m512) ((__v16sf)__A * (__v16sf)__B); 12680 } 12681 12682 extern __inline __m512 12683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12684 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12685 { 12686 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 12687 (__v16sf) __B, 12688 (__v16sf) __W, 12689 (__mmask16) __U, 12690 _MM_FROUND_CUR_DIRECTION); 12691 } 12692 12693 extern __inline __m512 12694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12695 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) 12696 { 12697 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 12698 (__v16sf) __B, 12699 (__v16sf) 12700 _mm512_setzero_ps (), 12701 (__mmask16) __U, 12702 _MM_FROUND_CUR_DIRECTION); 12703 } 12704 12705 extern __inline __m128d 12706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12707 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A, 12708 __m128d __B) 12709 { 12710 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 12711 (__v2df) __B, 12712 (__v2df) __W, 12713 (__mmask8) __U, 12714 _MM_FROUND_CUR_DIRECTION); 12715 } 12716 12717 extern __inline __m128d 12718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12719 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B) 12720 { 12721 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 12722 (__v2df) __B, 12723 (__v2df) 12724 _mm_setzero_pd (), 12725 (__mmask8) __U, 12726 _MM_FROUND_CUR_DIRECTION); 12727 } 12728 12729 extern __inline __m128 12730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12731 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A, 12732 __m128 __B) 12733 { 12734 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 12735 (__v4sf) __B, 12736 (__v4sf) __W, 12737 (__mmask8) __U, 12738 _MM_FROUND_CUR_DIRECTION); 12739 } 12740 12741 extern __inline __m128 12742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12743 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B) 12744 { 12745 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 12746 (__v4sf) __B, 12747 (__v4sf) 12748 _mm_setzero_ps (), 12749 (__mmask8) __U, 12750 _MM_FROUND_CUR_DIRECTION); 12751 } 12752 12753 extern __inline __m512d 12754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12755 _mm512_div_pd (__m512d __M, __m512d __V) 12756 { 12757 return (__m512d) ((__v8df)__M / (__v8df)__V); 12758 } 12759 12760 extern __inline __m512d 12761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12762 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) 12763 { 12764 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 12765 (__v8df) __V, 12766 (__v8df) __W, 12767 (__mmask8) __U, 12768 _MM_FROUND_CUR_DIRECTION); 12769 } 12770 12771 extern __inline __m512d 12772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12773 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) 12774 { 12775 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 12776 (__v8df) __V, 12777 (__v8df) 12778 _mm512_setzero_pd (), 12779 (__mmask8) __U, 12780 _MM_FROUND_CUR_DIRECTION); 12781 } 12782 12783 extern __inline __m512 12784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12785 _mm512_div_ps (__m512 __A, __m512 __B) 12786 { 12787 return (__m512) ((__v16sf)__A / (__v16sf)__B); 12788 } 12789 12790 extern __inline __m512 12791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12792 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12793 { 12794 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 12795 (__v16sf) __B, 12796 (__v16sf) __W, 12797 (__mmask16) __U, 12798 _MM_FROUND_CUR_DIRECTION); 12799 } 12800 12801 extern __inline __m512 12802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12803 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) 12804 { 12805 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 12806 (__v16sf) __B, 12807 (__v16sf) 12808 _mm512_setzero_ps (), 12809 (__mmask16) __U, 12810 _MM_FROUND_CUR_DIRECTION); 12811 } 12812 12813 extern __inline __m128d 12814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12815 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A, 12816 __m128d __B) 12817 { 12818 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 12819 (__v2df) __B, 12820 (__v2df) __W, 12821 (__mmask8) __U, 12822 _MM_FROUND_CUR_DIRECTION); 12823 } 12824 12825 extern __inline __m128d 12826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12827 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B) 12828 { 12829 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 12830 (__v2df) __B, 12831 (__v2df) 12832 _mm_setzero_pd (), 12833 (__mmask8) __U, 12834 _MM_FROUND_CUR_DIRECTION); 12835 } 12836 12837 extern __inline __m128 12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12839 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A, 12840 __m128 __B) 12841 { 12842 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 12843 (__v4sf) __B, 12844 (__v4sf) __W, 12845 (__mmask8) __U, 12846 _MM_FROUND_CUR_DIRECTION); 12847 } 12848 12849 extern __inline __m128 12850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12851 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B) 12852 { 12853 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 12854 (__v4sf) __B, 12855 (__v4sf) 12856 _mm_setzero_ps (), 12857 (__mmask8) __U, 12858 _MM_FROUND_CUR_DIRECTION); 12859 } 12860 12861 extern __inline __m512d 12862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12863 _mm512_max_pd (__m512d __A, __m512d __B) 12864 { 12865 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 12866 (__v8df) __B, 12867 (__v8df) 12868 _mm512_undefined_pd (), 12869 (__mmask8) -1, 12870 _MM_FROUND_CUR_DIRECTION); 12871 } 12872 12873 extern __inline __m512d 12874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12875 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12876 { 12877 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 12878 (__v8df) __B, 12879 (__v8df) __W, 12880 (__mmask8) __U, 12881 _MM_FROUND_CUR_DIRECTION); 12882 } 12883 12884 extern __inline __m512d 12885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12886 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 12887 { 12888 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 12889 (__v8df) __B, 12890 (__v8df) 12891 _mm512_setzero_pd (), 12892 (__mmask8) __U, 12893 _MM_FROUND_CUR_DIRECTION); 12894 } 12895 12896 extern __inline __m512 12897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12898 _mm512_max_ps (__m512 __A, __m512 __B) 12899 { 12900 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 12901 (__v16sf) __B, 12902 (__v16sf) 12903 _mm512_undefined_ps (), 12904 (__mmask16) -1, 12905 _MM_FROUND_CUR_DIRECTION); 12906 } 12907 12908 extern __inline __m512 12909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12910 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12911 { 12912 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 12913 (__v16sf) __B, 12914 (__v16sf) __W, 12915 (__mmask16) __U, 12916 _MM_FROUND_CUR_DIRECTION); 12917 } 12918 12919 extern __inline __m512 12920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12921 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 12922 { 12923 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 12924 (__v16sf) __B, 12925 (__v16sf) 12926 _mm512_setzero_ps (), 12927 (__mmask16) __U, 12928 _MM_FROUND_CUR_DIRECTION); 12929 } 12930 12931 extern __inline __m128d 12932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12933 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 12934 { 12935 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 12936 (__v2df) __B, 12937 (__v2df) __W, 12938 (__mmask8) __U, 12939 _MM_FROUND_CUR_DIRECTION); 12940 } 12941 12942 extern __inline __m128d 12943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12944 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B) 12945 { 12946 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 12947 (__v2df) __B, 12948 (__v2df) 12949 _mm_setzero_pd (), 12950 (__mmask8) __U, 12951 _MM_FROUND_CUR_DIRECTION); 12952 } 12953 12954 extern __inline __m128 12955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12956 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 12957 { 12958 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 12959 (__v4sf) __B, 12960 (__v4sf) __W, 12961 (__mmask8) __U, 12962 _MM_FROUND_CUR_DIRECTION); 12963 } 12964 12965 extern __inline __m128 12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12967 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B) 12968 { 12969 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 12970 (__v4sf) __B, 12971 (__v4sf) 12972 _mm_setzero_ps (), 12973 (__mmask8) __U, 12974 _MM_FROUND_CUR_DIRECTION); 12975 } 12976 12977 extern __inline __m512d 12978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12979 _mm512_min_pd (__m512d __A, __m512d __B) 12980 { 12981 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 12982 (__v8df) __B, 12983 (__v8df) 12984 _mm512_undefined_pd (), 12985 (__mmask8) -1, 12986 _MM_FROUND_CUR_DIRECTION); 12987 } 12988 12989 extern __inline __m512d 12990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12991 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12992 { 12993 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 12994 (__v8df) __B, 12995 (__v8df) __W, 12996 (__mmask8) __U, 12997 _MM_FROUND_CUR_DIRECTION); 12998 } 12999 13000 extern __inline __m512d 13001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13002 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 13003 { 13004 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 13005 (__v8df) __B, 13006 (__v8df) 13007 _mm512_setzero_pd (), 13008 (__mmask8) __U, 13009 _MM_FROUND_CUR_DIRECTION); 13010 } 13011 13012 extern __inline __m512 13013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13014 _mm512_min_ps (__m512 __A, __m512 __B) 13015 { 13016 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 13017 (__v16sf) __B, 13018 (__v16sf) 13019 _mm512_undefined_ps (), 13020 (__mmask16) -1, 13021 _MM_FROUND_CUR_DIRECTION); 13022 } 13023 13024 extern __inline __m512 13025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13026 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 13027 { 13028 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 13029 (__v16sf) __B, 13030 (__v16sf) __W, 13031 (__mmask16) __U, 13032 _MM_FROUND_CUR_DIRECTION); 13033 } 13034 13035 extern __inline __m512 13036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13037 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 13038 { 13039 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 13040 (__v16sf) __B, 13041 (__v16sf) 13042 _mm512_setzero_ps (), 13043 (__mmask16) __U, 13044 _MM_FROUND_CUR_DIRECTION); 13045 } 13046 13047 extern __inline __m128d 13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13049 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 13050 { 13051 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 13052 (__v2df) __B, 13053 (__v2df) __W, 13054 (__mmask8) __U, 13055 _MM_FROUND_CUR_DIRECTION); 13056 } 13057 13058 extern __inline __m128d 13059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13060 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B) 13061 { 13062 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 13063 (__v2df) __B, 13064 (__v2df) 13065 _mm_setzero_pd (), 13066 (__mmask8) __U, 13067 _MM_FROUND_CUR_DIRECTION); 13068 } 13069 13070 extern __inline __m128 13071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13072 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 13073 { 13074 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 13075 (__v4sf) __B, 13076 (__v4sf) __W, 13077 (__mmask8) __U, 13078 _MM_FROUND_CUR_DIRECTION); 13079 } 13080 13081 extern __inline __m128 13082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13083 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B) 13084 { 13085 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 13086 (__v4sf) __B, 13087 (__v4sf) 13088 _mm_setzero_ps (), 13089 (__mmask8) __U, 13090 _MM_FROUND_CUR_DIRECTION); 13091 } 13092 13093 extern __inline __m512d 13094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13095 _mm512_scalef_pd (__m512d __A, __m512d __B) 13096 { 13097 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 13098 (__v8df) __B, 13099 (__v8df) 13100 _mm512_undefined_pd (), 13101 (__mmask8) -1, 13102 _MM_FROUND_CUR_DIRECTION); 13103 } 13104 13105 extern __inline __m512d 13106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13107 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 13108 { 13109 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 13110 (__v8df) __B, 13111 (__v8df) __W, 13112 (__mmask8) __U, 13113 _MM_FROUND_CUR_DIRECTION); 13114 } 13115 13116 extern __inline __m512d 13117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13118 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 13119 { 13120 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 13121 (__v8df) __B, 13122 (__v8df) 13123 _mm512_setzero_pd (), 13124 (__mmask8) __U, 13125 _MM_FROUND_CUR_DIRECTION); 13126 } 13127 13128 extern __inline __m512 13129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13130 _mm512_scalef_ps (__m512 __A, __m512 __B) 13131 { 13132 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 13133 (__v16sf) __B, 13134 (__v16sf) 13135 _mm512_undefined_ps (), 13136 (__mmask16) -1, 13137 _MM_FROUND_CUR_DIRECTION); 13138 } 13139 13140 extern __inline __m512 13141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13142 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 13143 { 13144 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 13145 (__v16sf) __B, 13146 (__v16sf) __W, 13147 (__mmask16) __U, 13148 _MM_FROUND_CUR_DIRECTION); 13149 } 13150 13151 extern __inline __m512 13152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13153 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 13154 { 13155 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 13156 (__v16sf) __B, 13157 (__v16sf) 13158 _mm512_setzero_ps (), 13159 (__mmask16) __U, 13160 _MM_FROUND_CUR_DIRECTION); 13161 } 13162 13163 extern __inline __m128d 13164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13165 _mm_scalef_sd (__m128d __A, __m128d __B) 13166 { 13167 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 13168 (__v2df) __B, 13169 (__v2df) 13170 _mm_setzero_pd (), 13171 (__mmask8) -1, 13172 _MM_FROUND_CUR_DIRECTION); 13173 } 13174 13175 extern __inline __m128 13176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13177 _mm_scalef_ss (__m128 __A, __m128 __B) 13178 { 13179 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 13180 (__v4sf) __B, 13181 (__v4sf) 13182 _mm_setzero_ps (), 13183 (__mmask8) -1, 13184 _MM_FROUND_CUR_DIRECTION); 13185 } 13186 13187 extern __inline __m512d 13188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13189 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) 13190 { 13191 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 13192 (__v8df) __B, 13193 (__v8df) __C, 13194 (__mmask8) -1, 13195 _MM_FROUND_CUR_DIRECTION); 13196 } 13197 13198 extern __inline __m512d 13199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13200 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13201 { 13202 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 13203 (__v8df) __B, 13204 (__v8df) __C, 13205 (__mmask8) __U, 13206 _MM_FROUND_CUR_DIRECTION); 13207 } 13208 13209 extern __inline __m512d 13210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13211 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13212 { 13213 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 13214 (__v8df) __B, 13215 (__v8df) __C, 13216 (__mmask8) __U, 13217 _MM_FROUND_CUR_DIRECTION); 13218 } 13219 13220 extern __inline __m512d 13221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13222 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13223 { 13224 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 13225 (__v8df) __B, 13226 (__v8df) __C, 13227 (__mmask8) __U, 13228 _MM_FROUND_CUR_DIRECTION); 13229 } 13230 13231 extern __inline __m512 13232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13233 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) 13234 { 13235 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 13236 (__v16sf) __B, 13237 (__v16sf) __C, 13238 (__mmask16) -1, 13239 _MM_FROUND_CUR_DIRECTION); 13240 } 13241 13242 extern __inline __m512 13243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13244 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13245 { 13246 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 13247 (__v16sf) __B, 13248 (__v16sf) __C, 13249 (__mmask16) __U, 13250 _MM_FROUND_CUR_DIRECTION); 13251 } 13252 13253 extern __inline __m512 13254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13255 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13256 { 13257 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 13258 (__v16sf) __B, 13259 (__v16sf) __C, 13260 (__mmask16) __U, 13261 _MM_FROUND_CUR_DIRECTION); 13262 } 13263 13264 extern __inline __m512 13265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13266 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13267 { 13268 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 13269 (__v16sf) __B, 13270 (__v16sf) __C, 13271 (__mmask16) __U, 13272 _MM_FROUND_CUR_DIRECTION); 13273 } 13274 13275 extern __inline __m512d 13276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13277 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) 13278 { 13279 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, 13280 (__v8df) __B, 13281 (__v8df) __C, 13282 (__mmask8) -1, 13283 _MM_FROUND_CUR_DIRECTION); 13284 } 13285 13286 extern __inline __m512d 13287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13288 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13289 { 13290 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, 13291 (__v8df) __B, 13292 (__v8df) __C, 13293 (__mmask8) __U, 13294 _MM_FROUND_CUR_DIRECTION); 13295 } 13296 13297 extern __inline __m512d 13298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13299 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13300 { 13301 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 13302 (__v8df) __B, 13303 (__v8df) __C, 13304 (__mmask8) __U, 13305 _MM_FROUND_CUR_DIRECTION); 13306 } 13307 13308 extern __inline __m512d 13309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13310 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13311 { 13312 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A, 13313 (__v8df) __B, 13314 (__v8df) __C, 13315 (__mmask8) __U, 13316 _MM_FROUND_CUR_DIRECTION); 13317 } 13318 13319 extern __inline __m512 13320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13321 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) 13322 { 13323 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, 13324 (__v16sf) __B, 13325 (__v16sf) __C, 13326 (__mmask16) -1, 13327 _MM_FROUND_CUR_DIRECTION); 13328 } 13329 13330 extern __inline __m512 13331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13332 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13333 { 13334 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, 13335 (__v16sf) __B, 13336 (__v16sf) __C, 13337 (__mmask16) __U, 13338 _MM_FROUND_CUR_DIRECTION); 13339 } 13340 13341 extern __inline __m512 13342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13343 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13344 { 13345 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 13346 (__v16sf) __B, 13347 (__v16sf) __C, 13348 (__mmask16) __U, 13349 _MM_FROUND_CUR_DIRECTION); 13350 } 13351 13352 extern __inline __m512 13353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13354 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13355 { 13356 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A, 13357 (__v16sf) __B, 13358 (__v16sf) __C, 13359 (__mmask16) __U, 13360 _MM_FROUND_CUR_DIRECTION); 13361 } 13362 13363 extern __inline __m512d 13364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13365 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) 13366 { 13367 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 13368 (__v8df) __B, 13369 (__v8df) __C, 13370 (__mmask8) -1, 13371 _MM_FROUND_CUR_DIRECTION); 13372 } 13373 13374 extern __inline __m512d 13375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13376 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13377 { 13378 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 13379 (__v8df) __B, 13380 (__v8df) __C, 13381 (__mmask8) __U, 13382 _MM_FROUND_CUR_DIRECTION); 13383 } 13384 13385 extern __inline __m512d 13386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13387 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13388 { 13389 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 13390 (__v8df) __B, 13391 (__v8df) __C, 13392 (__mmask8) __U, 13393 _MM_FROUND_CUR_DIRECTION); 13394 } 13395 13396 extern __inline __m512d 13397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13398 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13399 { 13400 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 13401 (__v8df) __B, 13402 (__v8df) __C, 13403 (__mmask8) __U, 13404 _MM_FROUND_CUR_DIRECTION); 13405 } 13406 13407 extern __inline __m512 13408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13409 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) 13410 { 13411 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 13412 (__v16sf) __B, 13413 (__v16sf) __C, 13414 (__mmask16) -1, 13415 _MM_FROUND_CUR_DIRECTION); 13416 } 13417 13418 extern __inline __m512 13419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13420 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13421 { 13422 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 13423 (__v16sf) __B, 13424 (__v16sf) __C, 13425 (__mmask16) __U, 13426 _MM_FROUND_CUR_DIRECTION); 13427 } 13428 13429 extern __inline __m512 13430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13431 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13432 { 13433 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 13434 (__v16sf) __B, 13435 (__v16sf) __C, 13436 (__mmask16) __U, 13437 _MM_FROUND_CUR_DIRECTION); 13438 } 13439 13440 extern __inline __m512 13441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13442 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13443 { 13444 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 13445 (__v16sf) __B, 13446 (__v16sf) __C, 13447 (__mmask16) __U, 13448 _MM_FROUND_CUR_DIRECTION); 13449 } 13450 13451 extern __inline __m512d 13452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13453 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) 13454 { 13455 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 13456 (__v8df) __B, 13457 -(__v8df) __C, 13458 (__mmask8) -1, 13459 _MM_FROUND_CUR_DIRECTION); 13460 } 13461 13462 extern __inline __m512d 13463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13464 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13465 { 13466 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 13467 (__v8df) __B, 13468 -(__v8df) __C, 13469 (__mmask8) __U, 13470 _MM_FROUND_CUR_DIRECTION); 13471 } 13472 13473 extern __inline __m512d 13474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13475 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13476 { 13477 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 13478 (__v8df) __B, 13479 (__v8df) __C, 13480 (__mmask8) __U, 13481 _MM_FROUND_CUR_DIRECTION); 13482 } 13483 13484 extern __inline __m512d 13485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13486 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13487 { 13488 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 13489 (__v8df) __B, 13490 -(__v8df) __C, 13491 (__mmask8) __U, 13492 _MM_FROUND_CUR_DIRECTION); 13493 } 13494 13495 extern __inline __m512 13496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13497 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) 13498 { 13499 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 13500 (__v16sf) __B, 13501 -(__v16sf) __C, 13502 (__mmask16) -1, 13503 _MM_FROUND_CUR_DIRECTION); 13504 } 13505 13506 extern __inline __m512 13507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13508 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13509 { 13510 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 13511 (__v16sf) __B, 13512 -(__v16sf) __C, 13513 (__mmask16) __U, 13514 _MM_FROUND_CUR_DIRECTION); 13515 } 13516 13517 extern __inline __m512 13518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13519 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13520 { 13521 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 13522 (__v16sf) __B, 13523 (__v16sf) __C, 13524 (__mmask16) __U, 13525 _MM_FROUND_CUR_DIRECTION); 13526 } 13527 13528 extern __inline __m512 13529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13530 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13531 { 13532 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 13533 (__v16sf) __B, 13534 -(__v16sf) __C, 13535 (__mmask16) __U, 13536 _MM_FROUND_CUR_DIRECTION); 13537 } 13538 13539 extern __inline __m512d 13540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13541 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) 13542 { 13543 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 13544 (__v8df) __B, 13545 (__v8df) __C, 13546 (__mmask8) -1, 13547 _MM_FROUND_CUR_DIRECTION); 13548 } 13549 13550 extern __inline __m512d 13551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13552 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13553 { 13554 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 13555 (__v8df) __B, 13556 (__v8df) __C, 13557 (__mmask8) __U, 13558 _MM_FROUND_CUR_DIRECTION); 13559 } 13560 13561 extern __inline __m512d 13562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13563 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13564 { 13565 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A, 13566 (__v8df) __B, 13567 (__v8df) __C, 13568 (__mmask8) __U, 13569 _MM_FROUND_CUR_DIRECTION); 13570 } 13571 13572 extern __inline __m512d 13573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13574 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13575 { 13576 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A, 13577 (__v8df) __B, 13578 (__v8df) __C, 13579 (__mmask8) __U, 13580 _MM_FROUND_CUR_DIRECTION); 13581 } 13582 13583 extern __inline __m512 13584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13585 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) 13586 { 13587 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 13588 (__v16sf) __B, 13589 (__v16sf) __C, 13590 (__mmask16) -1, 13591 _MM_FROUND_CUR_DIRECTION); 13592 } 13593 13594 extern __inline __m512 13595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13596 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13597 { 13598 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 13599 (__v16sf) __B, 13600 (__v16sf) __C, 13601 (__mmask16) __U, 13602 _MM_FROUND_CUR_DIRECTION); 13603 } 13604 13605 extern __inline __m512 13606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13607 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13608 { 13609 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A, 13610 (__v16sf) __B, 13611 (__v16sf) __C, 13612 (__mmask16) __U, 13613 _MM_FROUND_CUR_DIRECTION); 13614 } 13615 13616 extern __inline __m512 13617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13618 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13619 { 13620 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A, 13621 (__v16sf) __B, 13622 (__v16sf) __C, 13623 (__mmask16) __U, 13624 _MM_FROUND_CUR_DIRECTION); 13625 } 13626 13627 extern __inline __m512d 13628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13629 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) 13630 { 13631 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 13632 (__v8df) __B, 13633 (__v8df) __C, 13634 (__mmask8) -1, 13635 _MM_FROUND_CUR_DIRECTION); 13636 } 13637 13638 extern __inline __m512d 13639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13640 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 13641 { 13642 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 13643 (__v8df) __B, 13644 (__v8df) __C, 13645 (__mmask8) __U, 13646 _MM_FROUND_CUR_DIRECTION); 13647 } 13648 13649 extern __inline __m512d 13650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13651 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 13652 { 13653 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 13654 (__v8df) __B, 13655 (__v8df) __C, 13656 (__mmask8) __U, 13657 _MM_FROUND_CUR_DIRECTION); 13658 } 13659 13660 extern __inline __m512d 13661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13662 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 13663 { 13664 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A, 13665 (__v8df) __B, 13666 (__v8df) __C, 13667 (__mmask8) __U, 13668 _MM_FROUND_CUR_DIRECTION); 13669 } 13670 13671 extern __inline __m512 13672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13673 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) 13674 { 13675 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 13676 (__v16sf) __B, 13677 (__v16sf) __C, 13678 (__mmask16) -1, 13679 _MM_FROUND_CUR_DIRECTION); 13680 } 13681 13682 extern __inline __m512 13683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13684 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 13685 { 13686 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 13687 (__v16sf) __B, 13688 (__v16sf) __C, 13689 (__mmask16) __U, 13690 _MM_FROUND_CUR_DIRECTION); 13691 } 13692 13693 extern __inline __m512 13694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13695 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 13696 { 13697 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 13698 (__v16sf) __B, 13699 (__v16sf) __C, 13700 (__mmask16) __U, 13701 _MM_FROUND_CUR_DIRECTION); 13702 } 13703 13704 extern __inline __m512 13705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13706 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 13707 { 13708 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A, 13709 (__v16sf) __B, 13710 (__v16sf) __C, 13711 (__mmask16) __U, 13712 _MM_FROUND_CUR_DIRECTION); 13713 } 13714 13715 extern __inline __m256i 13716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13717 _mm512_cvttpd_epi32 (__m512d __A) 13718 { 13719 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 13720 (__v8si) 13721 _mm256_undefined_si256 (), 13722 (__mmask8) -1, 13723 _MM_FROUND_CUR_DIRECTION); 13724 } 13725 13726 extern __inline __m256i 13727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13728 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 13729 { 13730 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 13731 (__v8si) __W, 13732 (__mmask8) __U, 13733 _MM_FROUND_CUR_DIRECTION); 13734 } 13735 13736 extern __inline __m256i 13737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13738 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 13739 { 13740 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 13741 (__v8si) 13742 _mm256_setzero_si256 (), 13743 (__mmask8) __U, 13744 _MM_FROUND_CUR_DIRECTION); 13745 } 13746 13747 extern __inline __m256i 13748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13749 _mm512_cvttpd_epu32 (__m512d __A) 13750 { 13751 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 13752 (__v8si) 13753 _mm256_undefined_si256 (), 13754 (__mmask8) -1, 13755 _MM_FROUND_CUR_DIRECTION); 13756 } 13757 13758 extern __inline __m256i 13759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13760 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 13761 { 13762 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 13763 (__v8si) __W, 13764 (__mmask8) __U, 13765 _MM_FROUND_CUR_DIRECTION); 13766 } 13767 13768 extern __inline __m256i 13769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13770 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 13771 { 13772 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 13773 (__v8si) 13774 _mm256_setzero_si256 (), 13775 (__mmask8) __U, 13776 _MM_FROUND_CUR_DIRECTION); 13777 } 13778 13779 extern __inline __m256i 13780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13781 _mm512_cvtpd_epi32 (__m512d __A) 13782 { 13783 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 13784 (__v8si) 13785 _mm256_undefined_si256 (), 13786 (__mmask8) -1, 13787 _MM_FROUND_CUR_DIRECTION); 13788 } 13789 13790 extern __inline __m256i 13791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13792 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 13793 { 13794 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 13795 (__v8si) __W, 13796 (__mmask8) __U, 13797 _MM_FROUND_CUR_DIRECTION); 13798 } 13799 13800 extern __inline __m256i 13801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13802 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 13803 { 13804 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 13805 (__v8si) 13806 _mm256_setzero_si256 (), 13807 (__mmask8) __U, 13808 _MM_FROUND_CUR_DIRECTION); 13809 } 13810 13811 extern __inline __m256i 13812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13813 _mm512_cvtpd_epu32 (__m512d __A) 13814 { 13815 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 13816 (__v8si) 13817 _mm256_undefined_si256 (), 13818 (__mmask8) -1, 13819 _MM_FROUND_CUR_DIRECTION); 13820 } 13821 13822 extern __inline __m256i 13823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13824 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 13825 { 13826 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 13827 (__v8si) __W, 13828 (__mmask8) __U, 13829 _MM_FROUND_CUR_DIRECTION); 13830 } 13831 13832 extern __inline __m256i 13833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13834 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 13835 { 13836 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 13837 (__v8si) 13838 _mm256_setzero_si256 (), 13839 (__mmask8) __U, 13840 _MM_FROUND_CUR_DIRECTION); 13841 } 13842 13843 extern __inline __m512i 13844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13845 _mm512_cvttps_epi32 (__m512 __A) 13846 { 13847 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 13848 (__v16si) 13849 _mm512_undefined_epi32 (), 13850 (__mmask16) -1, 13851 _MM_FROUND_CUR_DIRECTION); 13852 } 13853 13854 extern __inline __m512i 13855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13856 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 13857 { 13858 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 13859 (__v16si) __W, 13860 (__mmask16) __U, 13861 _MM_FROUND_CUR_DIRECTION); 13862 } 13863 13864 extern __inline __m512i 13865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13866 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 13867 { 13868 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 13869 (__v16si) 13870 _mm512_setzero_si512 (), 13871 (__mmask16) __U, 13872 _MM_FROUND_CUR_DIRECTION); 13873 } 13874 13875 extern __inline __m512i 13876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13877 _mm512_cvttps_epu32 (__m512 __A) 13878 { 13879 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 13880 (__v16si) 13881 _mm512_undefined_epi32 (), 13882 (__mmask16) -1, 13883 _MM_FROUND_CUR_DIRECTION); 13884 } 13885 13886 extern __inline __m512i 13887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13888 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 13889 { 13890 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 13891 (__v16si) __W, 13892 (__mmask16) __U, 13893 _MM_FROUND_CUR_DIRECTION); 13894 } 13895 13896 extern __inline __m512i 13897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13898 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 13899 { 13900 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 13901 (__v16si) 13902 _mm512_setzero_si512 (), 13903 (__mmask16) __U, 13904 _MM_FROUND_CUR_DIRECTION); 13905 } 13906 13907 extern __inline __m512i 13908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13909 _mm512_cvtps_epi32 (__m512 __A) 13910 { 13911 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 13912 (__v16si) 13913 _mm512_undefined_epi32 (), 13914 (__mmask16) -1, 13915 _MM_FROUND_CUR_DIRECTION); 13916 } 13917 13918 extern __inline __m512i 13919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13920 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 13921 { 13922 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 13923 (__v16si) __W, 13924 (__mmask16) __U, 13925 _MM_FROUND_CUR_DIRECTION); 13926 } 13927 13928 extern __inline __m512i 13929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13930 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 13931 { 13932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 13933 (__v16si) 13934 _mm512_setzero_si512 (), 13935 (__mmask16) __U, 13936 _MM_FROUND_CUR_DIRECTION); 13937 } 13938 13939 extern __inline __m512i 13940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13941 _mm512_cvtps_epu32 (__m512 __A) 13942 { 13943 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13944 (__v16si) 13945 _mm512_undefined_epi32 (), 13946 (__mmask16) -1, 13947 _MM_FROUND_CUR_DIRECTION); 13948 } 13949 13950 extern __inline __m512i 13951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13952 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 13953 { 13954 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13955 (__v16si) __W, 13956 (__mmask16) __U, 13957 _MM_FROUND_CUR_DIRECTION); 13958 } 13959 13960 extern __inline __m512i 13961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13962 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) 13963 { 13964 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13965 (__v16si) 13966 _mm512_setzero_si512 (), 13967 (__mmask16) __U, 13968 _MM_FROUND_CUR_DIRECTION); 13969 } 13970 13971 extern __inline double 13972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13973 _mm512_cvtsd_f64 (__m512d __A) 13974 { 13975 return __A[0]; 13976 } 13977 13978 extern __inline float 13979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13980 _mm512_cvtss_f32 (__m512 __A) 13981 { 13982 return __A[0]; 13983 } 13984 13985 #ifdef __x86_64__ 13986 extern __inline __m128 13987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13988 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) 13989 { 13990 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 13991 _MM_FROUND_CUR_DIRECTION); 13992 } 13993 13994 extern __inline __m128d 13995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13996 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) 13997 { 13998 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 13999 _MM_FROUND_CUR_DIRECTION); 14000 } 14001 #endif 14002 14003 extern __inline __m128 14004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14005 _mm_cvtu32_ss (__m128 __A, unsigned __B) 14006 { 14007 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 14008 _MM_FROUND_CUR_DIRECTION); 14009 } 14010 14011 extern __inline __m512 14012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14013 _mm512_cvtepi32_ps (__m512i __A) 14014 { 14015 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 14016 (__v16sf) 14017 _mm512_undefined_ps (), 14018 (__mmask16) -1, 14019 _MM_FROUND_CUR_DIRECTION); 14020 } 14021 14022 extern __inline __m512 14023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14024 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 14025 { 14026 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 14027 (__v16sf) __W, 14028 (__mmask16) __U, 14029 _MM_FROUND_CUR_DIRECTION); 14030 } 14031 14032 extern __inline __m512 14033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14034 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 14035 { 14036 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 14037 (__v16sf) 14038 _mm512_setzero_ps (), 14039 (__mmask16) __U, 14040 _MM_FROUND_CUR_DIRECTION); 14041 } 14042 14043 extern __inline __m512 14044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14045 _mm512_cvtepu32_ps (__m512i __A) 14046 { 14047 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 14048 (__v16sf) 14049 _mm512_undefined_ps (), 14050 (__mmask16) -1, 14051 _MM_FROUND_CUR_DIRECTION); 14052 } 14053 14054 extern __inline __m512 14055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14056 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 14057 { 14058 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 14059 (__v16sf) __W, 14060 (__mmask16) __U, 14061 _MM_FROUND_CUR_DIRECTION); 14062 } 14063 14064 extern __inline __m512 14065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14066 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 14067 { 14068 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 14069 (__v16sf) 14070 _mm512_setzero_ps (), 14071 (__mmask16) __U, 14072 _MM_FROUND_CUR_DIRECTION); 14073 } 14074 14075 #ifdef __OPTIMIZE__ 14076 extern __inline __m512d 14077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14078 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) 14079 { 14080 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 14081 (__v8df) __B, 14082 (__v8di) __C, 14083 __imm, 14084 (__mmask8) -1, 14085 _MM_FROUND_CUR_DIRECTION); 14086 } 14087 14088 extern __inline __m512d 14089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14090 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, 14091 __m512i __C, const int __imm) 14092 { 14093 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 14094 (__v8df) __B, 14095 (__v8di) __C, 14096 __imm, 14097 (__mmask8) __U, 14098 _MM_FROUND_CUR_DIRECTION); 14099 } 14100 14101 extern __inline __m512d 14102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14103 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, 14104 __m512i __C, const int __imm) 14105 { 14106 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 14107 (__v8df) __B, 14108 (__v8di) __C, 14109 __imm, 14110 (__mmask8) __U, 14111 _MM_FROUND_CUR_DIRECTION); 14112 } 14113 14114 extern __inline __m512 14115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14116 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) 14117 { 14118 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 14119 (__v16sf) __B, 14120 (__v16si) __C, 14121 __imm, 14122 (__mmask16) -1, 14123 _MM_FROUND_CUR_DIRECTION); 14124 } 14125 14126 extern __inline __m512 14127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14128 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, 14129 __m512i __C, const int __imm) 14130 { 14131 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 14132 (__v16sf) __B, 14133 (__v16si) __C, 14134 __imm, 14135 (__mmask16) __U, 14136 _MM_FROUND_CUR_DIRECTION); 14137 } 14138 14139 extern __inline __m512 14140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14141 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, 14142 __m512i __C, const int __imm) 14143 { 14144 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 14145 (__v16sf) __B, 14146 (__v16si) __C, 14147 __imm, 14148 (__mmask16) __U, 14149 _MM_FROUND_CUR_DIRECTION); 14150 } 14151 14152 extern __inline __m128d 14153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14154 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) 14155 { 14156 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 14157 (__v2df) __B, 14158 (__v2di) __C, __imm, 14159 (__mmask8) -1, 14160 _MM_FROUND_CUR_DIRECTION); 14161 } 14162 14163 extern __inline __m128d 14164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14165 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, 14166 __m128i __C, const int __imm) 14167 { 14168 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 14169 (__v2df) __B, 14170 (__v2di) __C, __imm, 14171 (__mmask8) __U, 14172 _MM_FROUND_CUR_DIRECTION); 14173 } 14174 14175 extern __inline __m128d 14176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14177 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, 14178 __m128i __C, const int __imm) 14179 { 14180 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 14181 (__v2df) __B, 14182 (__v2di) __C, 14183 __imm, 14184 (__mmask8) __U, 14185 _MM_FROUND_CUR_DIRECTION); 14186 } 14187 14188 extern __inline __m128 14189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14190 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) 14191 { 14192 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 14193 (__v4sf) __B, 14194 (__v4si) __C, __imm, 14195 (__mmask8) -1, 14196 _MM_FROUND_CUR_DIRECTION); 14197 } 14198 14199 extern __inline __m128 14200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14201 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, 14202 __m128i __C, const int __imm) 14203 { 14204 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 14205 (__v4sf) __B, 14206 (__v4si) __C, __imm, 14207 (__mmask8) __U, 14208 _MM_FROUND_CUR_DIRECTION); 14209 } 14210 14211 extern __inline __m128 14212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14213 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, 14214 __m128i __C, const int __imm) 14215 { 14216 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 14217 (__v4sf) __B, 14218 (__v4si) __C, __imm, 14219 (__mmask8) __U, 14220 _MM_FROUND_CUR_DIRECTION); 14221 } 14222 #else 14223 #define _mm512_fixupimm_pd(X, Y, Z, C) \ 14224 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 14225 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 14226 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 14227 14228 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ 14229 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 14230 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 14231 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14232 14233 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ 14234 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 14235 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 14236 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14237 14238 #define _mm512_fixupimm_ps(X, Y, Z, C) \ 14239 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 14240 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 14241 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 14242 14243 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ 14244 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 14245 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 14246 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 14247 14248 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ 14249 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 14250 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 14251 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 14252 14253 #define _mm_fixupimm_sd(X, Y, Z, C) \ 14254 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 14255 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 14256 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 14257 14258 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ 14259 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 14260 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 14261 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14262 14263 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ 14264 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 14265 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 14266 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14267 14268 #define _mm_fixupimm_ss(X, Y, Z, C) \ 14269 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 14270 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 14271 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 14272 14273 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ 14274 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 14275 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 14276 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14277 14278 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ 14279 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 14280 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 14281 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14282 #endif 14283 14284 #ifdef __x86_64__ 14285 extern __inline unsigned long long 14286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14287 _mm_cvtss_u64 (__m128 __A) 14288 { 14289 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 14290 __A, 14291 _MM_FROUND_CUR_DIRECTION); 14292 } 14293 14294 extern __inline unsigned long long 14295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14296 _mm_cvttss_u64 (__m128 __A) 14297 { 14298 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 14299 __A, 14300 _MM_FROUND_CUR_DIRECTION); 14301 } 14302 14303 extern __inline long long 14304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14305 _mm_cvttss_i64 (__m128 __A) 14306 { 14307 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 14308 _MM_FROUND_CUR_DIRECTION); 14309 } 14310 #endif /* __x86_64__ */ 14311 14312 extern __inline unsigned 14313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14314 _mm_cvtss_u32 (__m128 __A) 14315 { 14316 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 14317 _MM_FROUND_CUR_DIRECTION); 14318 } 14319 14320 extern __inline unsigned 14321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14322 _mm_cvttss_u32 (__m128 __A) 14323 { 14324 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 14325 _MM_FROUND_CUR_DIRECTION); 14326 } 14327 14328 extern __inline int 14329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14330 _mm_cvttss_i32 (__m128 __A) 14331 { 14332 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 14333 _MM_FROUND_CUR_DIRECTION); 14334 } 14335 14336 #ifdef __x86_64__ 14337 extern __inline unsigned long long 14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14339 _mm_cvtsd_u64 (__m128d __A) 14340 { 14341 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 14342 __A, 14343 _MM_FROUND_CUR_DIRECTION); 14344 } 14345 14346 extern __inline unsigned long long 14347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14348 _mm_cvttsd_u64 (__m128d __A) 14349 { 14350 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 14351 __A, 14352 _MM_FROUND_CUR_DIRECTION); 14353 } 14354 14355 extern __inline long long 14356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14357 _mm_cvttsd_i64 (__m128d __A) 14358 { 14359 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 14360 _MM_FROUND_CUR_DIRECTION); 14361 } 14362 #endif /* __x86_64__ */ 14363 14364 extern __inline unsigned 14365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14366 _mm_cvtsd_u32 (__m128d __A) 14367 { 14368 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 14369 _MM_FROUND_CUR_DIRECTION); 14370 } 14371 14372 extern __inline unsigned 14373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14374 _mm_cvttsd_u32 (__m128d __A) 14375 { 14376 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 14377 _MM_FROUND_CUR_DIRECTION); 14378 } 14379 14380 extern __inline int 14381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14382 _mm_cvttsd_i32 (__m128d __A) 14383 { 14384 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 14385 _MM_FROUND_CUR_DIRECTION); 14386 } 14387 14388 extern __inline __m512d 14389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14390 _mm512_cvtps_pd (__m256 __A) 14391 { 14392 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 14393 (__v8df) 14394 _mm512_undefined_pd (), 14395 (__mmask8) -1, 14396 _MM_FROUND_CUR_DIRECTION); 14397 } 14398 14399 extern __inline __m512d 14400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14401 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 14402 { 14403 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 14404 (__v8df) __W, 14405 (__mmask8) __U, 14406 _MM_FROUND_CUR_DIRECTION); 14407 } 14408 14409 extern __inline __m512d 14410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14411 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 14412 { 14413 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 14414 (__v8df) 14415 _mm512_setzero_pd (), 14416 (__mmask8) __U, 14417 _MM_FROUND_CUR_DIRECTION); 14418 } 14419 14420 extern __inline __m512 14421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14422 _mm512_cvtph_ps (__m256i __A) 14423 { 14424 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 14425 (__v16sf) 14426 _mm512_undefined_ps (), 14427 (__mmask16) -1, 14428 _MM_FROUND_CUR_DIRECTION); 14429 } 14430 14431 extern __inline __m512 14432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14433 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 14434 { 14435 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 14436 (__v16sf) __W, 14437 (__mmask16) __U, 14438 _MM_FROUND_CUR_DIRECTION); 14439 } 14440 14441 extern __inline __m512 14442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14443 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 14444 { 14445 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 14446 (__v16sf) 14447 _mm512_setzero_ps (), 14448 (__mmask16) __U, 14449 _MM_FROUND_CUR_DIRECTION); 14450 } 14451 14452 extern __inline __m256 14453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14454 _mm512_cvtpd_ps (__m512d __A) 14455 { 14456 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 14457 (__v8sf) 14458 _mm256_undefined_ps (), 14459 (__mmask8) -1, 14460 _MM_FROUND_CUR_DIRECTION); 14461 } 14462 14463 extern __inline __m256 14464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14465 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 14466 { 14467 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 14468 (__v8sf) __W, 14469 (__mmask8) __U, 14470 _MM_FROUND_CUR_DIRECTION); 14471 } 14472 14473 extern __inline __m256 14474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14475 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 14476 { 14477 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 14478 (__v8sf) 14479 _mm256_setzero_ps (), 14480 (__mmask8) __U, 14481 _MM_FROUND_CUR_DIRECTION); 14482 } 14483 14484 #ifdef __OPTIMIZE__ 14485 extern __inline __m512 14486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14487 _mm512_getexp_ps (__m512 __A) 14488 { 14489 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 14490 (__v16sf) 14491 _mm512_undefined_ps (), 14492 (__mmask16) -1, 14493 _MM_FROUND_CUR_DIRECTION); 14494 } 14495 14496 extern __inline __m512 14497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14498 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 14499 { 14500 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 14501 (__v16sf) __W, 14502 (__mmask16) __U, 14503 _MM_FROUND_CUR_DIRECTION); 14504 } 14505 14506 extern __inline __m512 14507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14508 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 14509 { 14510 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 14511 (__v16sf) 14512 _mm512_setzero_ps (), 14513 (__mmask16) __U, 14514 _MM_FROUND_CUR_DIRECTION); 14515 } 14516 14517 extern __inline __m512d 14518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14519 _mm512_getexp_pd (__m512d __A) 14520 { 14521 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 14522 (__v8df) 14523 _mm512_undefined_pd (), 14524 (__mmask8) -1, 14525 _MM_FROUND_CUR_DIRECTION); 14526 } 14527 14528 extern __inline __m512d 14529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14530 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 14531 { 14532 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 14533 (__v8df) __W, 14534 (__mmask8) __U, 14535 _MM_FROUND_CUR_DIRECTION); 14536 } 14537 14538 extern __inline __m512d 14539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14540 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 14541 { 14542 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 14543 (__v8df) 14544 _mm512_setzero_pd (), 14545 (__mmask8) __U, 14546 _MM_FROUND_CUR_DIRECTION); 14547 } 14548 14549 extern __inline __m128 14550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14551 _mm_getexp_ss (__m128 __A, __m128 __B) 14552 { 14553 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 14554 (__v4sf) __B, 14555 _MM_FROUND_CUR_DIRECTION); 14556 } 14557 14558 extern __inline __m128 14559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14560 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 14561 { 14562 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 14563 (__v4sf) __B, 14564 (__v4sf) __W, 14565 (__mmask8) __U, 14566 _MM_FROUND_CUR_DIRECTION); 14567 } 14568 14569 extern __inline __m128 14570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14571 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 14572 { 14573 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 14574 (__v4sf) __B, 14575 (__v4sf) 14576 _mm_setzero_ps (), 14577 (__mmask8) __U, 14578 _MM_FROUND_CUR_DIRECTION); 14579 } 14580 14581 extern __inline __m128d 14582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14583 _mm_getexp_sd (__m128d __A, __m128d __B) 14584 { 14585 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 14586 (__v2df) __B, 14587 _MM_FROUND_CUR_DIRECTION); 14588 } 14589 14590 extern __inline __m128d 14591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14592 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 14593 { 14594 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 14595 (__v2df) __B, 14596 (__v2df) __W, 14597 (__mmask8) __U, 14598 _MM_FROUND_CUR_DIRECTION); 14599 } 14600 14601 extern __inline __m128d 14602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14603 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 14604 { 14605 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 14606 (__v2df) __B, 14607 (__v2df) 14608 _mm_setzero_pd (), 14609 (__mmask8) __U, 14610 _MM_FROUND_CUR_DIRECTION); 14611 } 14612 14613 extern __inline __m512d 14614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14615 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 14616 _MM_MANTISSA_SIGN_ENUM __C) 14617 { 14618 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 14619 (__C << 2) | __B, 14620 _mm512_undefined_pd (), 14621 (__mmask8) -1, 14622 _MM_FROUND_CUR_DIRECTION); 14623 } 14624 14625 extern __inline __m512d 14626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14627 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, 14628 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 14629 { 14630 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 14631 (__C << 2) | __B, 14632 (__v8df) __W, __U, 14633 _MM_FROUND_CUR_DIRECTION); 14634 } 14635 14636 extern __inline __m512d 14637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14638 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, 14639 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 14640 { 14641 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 14642 (__C << 2) | __B, 14643 (__v8df) 14644 _mm512_setzero_pd (), 14645 __U, 14646 _MM_FROUND_CUR_DIRECTION); 14647 } 14648 14649 extern __inline __m512 14650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14651 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 14652 _MM_MANTISSA_SIGN_ENUM __C) 14653 { 14654 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 14655 (__C << 2) | __B, 14656 _mm512_undefined_ps (), 14657 (__mmask16) -1, 14658 _MM_FROUND_CUR_DIRECTION); 14659 } 14660 14661 extern __inline __m512 14662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14663 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, 14664 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 14665 { 14666 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 14667 (__C << 2) | __B, 14668 (__v16sf) __W, __U, 14669 _MM_FROUND_CUR_DIRECTION); 14670 } 14671 14672 extern __inline __m512 14673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14674 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, 14675 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 14676 { 14677 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 14678 (__C << 2) | __B, 14679 (__v16sf) 14680 _mm512_setzero_ps (), 14681 __U, 14682 _MM_FROUND_CUR_DIRECTION); 14683 } 14684 14685 extern __inline __m128d 14686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14687 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 14688 _MM_MANTISSA_SIGN_ENUM __D) 14689 { 14690 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 14691 (__v2df) __B, 14692 (__D << 2) | __C, 14693 _MM_FROUND_CUR_DIRECTION); 14694 } 14695 14696 extern __inline __m128d 14697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14698 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 14699 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 14700 { 14701 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 14702 (__v2df) __B, 14703 (__D << 2) | __C, 14704 (__v2df) __W, 14705 __U, 14706 _MM_FROUND_CUR_DIRECTION); 14707 } 14708 14709 extern __inline __m128d 14710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14711 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B, 14712 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 14713 { 14714 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 14715 (__v2df) __B, 14716 (__D << 2) | __C, 14717 (__v2df) 14718 _mm_setzero_pd(), 14719 __U, 14720 _MM_FROUND_CUR_DIRECTION); 14721 } 14722 14723 extern __inline __m128 14724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14725 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 14726 _MM_MANTISSA_SIGN_ENUM __D) 14727 { 14728 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 14729 (__v4sf) __B, 14730 (__D << 2) | __C, 14731 _MM_FROUND_CUR_DIRECTION); 14732 } 14733 14734 extern __inline __m128 14735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14736 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 14737 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 14738 { 14739 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 14740 (__v4sf) __B, 14741 (__D << 2) | __C, 14742 (__v4sf) __W, 14743 __U, 14744 _MM_FROUND_CUR_DIRECTION); 14745 } 14746 14747 extern __inline __m128 14748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14749 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B, 14750 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 14751 { 14752 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 14753 (__v4sf) __B, 14754 (__D << 2) | __C, 14755 (__v4sf) 14756 _mm_setzero_ps(), 14757 __U, 14758 _MM_FROUND_CUR_DIRECTION); 14759 } 14760 14761 #else 14762 #define _mm512_getmant_pd(X, B, C) \ 14763 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 14764 (int)(((C)<<2) | (B)), \ 14765 (__v8df)_mm512_undefined_pd(), \ 14766 (__mmask8)-1,\ 14767 _MM_FROUND_CUR_DIRECTION)) 14768 14769 #define _mm512_mask_getmant_pd(W, U, X, B, C) \ 14770 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 14771 (int)(((C)<<2) | (B)), \ 14772 (__v8df)(__m512d)(W), \ 14773 (__mmask8)(U),\ 14774 _MM_FROUND_CUR_DIRECTION)) 14775 14776 #define _mm512_maskz_getmant_pd(U, X, B, C) \ 14777 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 14778 (int)(((C)<<2) | (B)), \ 14779 (__v8df)_mm512_setzero_pd(), \ 14780 (__mmask8)(U),\ 14781 _MM_FROUND_CUR_DIRECTION)) 14782 #define _mm512_getmant_ps(X, B, C) \ 14783 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 14784 (int)(((C)<<2) | (B)), \ 14785 (__v16sf)_mm512_undefined_ps(), \ 14786 (__mmask16)-1,\ 14787 _MM_FROUND_CUR_DIRECTION)) 14788 14789 #define _mm512_mask_getmant_ps(W, U, X, B, C) \ 14790 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 14791 (int)(((C)<<2) | (B)), \ 14792 (__v16sf)(__m512)(W), \ 14793 (__mmask16)(U),\ 14794 _MM_FROUND_CUR_DIRECTION)) 14795 14796 #define _mm512_maskz_getmant_ps(U, X, B, C) \ 14797 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 14798 (int)(((C)<<2) | (B)), \ 14799 (__v16sf)_mm512_setzero_ps(), \ 14800 (__mmask16)(U),\ 14801 _MM_FROUND_CUR_DIRECTION)) 14802 #define _mm_getmant_sd(X, Y, C, D) \ 14803 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 14804 (__v2df)(__m128d)(Y), \ 14805 (int)(((D)<<2) | (C)), \ 14806 _MM_FROUND_CUR_DIRECTION)) 14807 14808 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \ 14809 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 14810 (__v2df)(__m128d)(Y), \ 14811 (int)(((D)<<2) | (C)), \ 14812 (__v2df)(__m128d)(W), \ 14813 (__mmask8)(U),\ 14814 _MM_FROUND_CUR_DIRECTION)) 14815 14816 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \ 14817 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 14818 (__v2df)(__m128d)(Y), \ 14819 (int)(((D)<<2) | (C)), \ 14820 (__v2df)_mm_setzero_pd(), \ 14821 (__mmask8)(U),\ 14822 _MM_FROUND_CUR_DIRECTION)) 14823 14824 #define _mm_getmant_ss(X, Y, C, D) \ 14825 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 14826 (__v4sf)(__m128)(Y), \ 14827 (int)(((D)<<2) | (C)), \ 14828 _MM_FROUND_CUR_DIRECTION)) 14829 14830 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \ 14831 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 14832 (__v4sf)(__m128)(Y), \ 14833 (int)(((D)<<2) | (C)), \ 14834 (__v4sf)(__m128)(W), \ 14835 (__mmask8)(U),\ 14836 _MM_FROUND_CUR_DIRECTION)) 14837 14838 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \ 14839 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 14840 (__v4sf)(__m128)(Y), \ 14841 (int)(((D)<<2) | (C)), \ 14842 (__v4sf)_mm_setzero_ps(), \ 14843 (__mmask8)(U),\ 14844 _MM_FROUND_CUR_DIRECTION)) 14845 14846 #define _mm_getexp_ss(A, B) \ 14847 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 14848 _MM_FROUND_CUR_DIRECTION)) 14849 14850 #define _mm_mask_getexp_ss(W, U, A, B) \ 14851 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\ 14852 _MM_FROUND_CUR_DIRECTION) 14853 14854 #define _mm_maskz_getexp_ss(U, A, B) \ 14855 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\ 14856 _MM_FROUND_CUR_DIRECTION) 14857 14858 #define _mm_getexp_sd(A, B) \ 14859 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\ 14860 _MM_FROUND_CUR_DIRECTION)) 14861 14862 #define _mm_mask_getexp_sd(W, U, A, B) \ 14863 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\ 14864 _MM_FROUND_CUR_DIRECTION) 14865 14866 #define _mm_maskz_getexp_sd(U, A, B) \ 14867 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\ 14868 _MM_FROUND_CUR_DIRECTION) 14869 14870 #define _mm512_getexp_ps(A) \ 14871 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 14872 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 14873 14874 #define _mm512_mask_getexp_ps(W, U, A) \ 14875 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 14876 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 14877 14878 #define _mm512_maskz_getexp_ps(U, A) \ 14879 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 14880 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 14881 14882 #define _mm512_getexp_pd(A) \ 14883 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 14884 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 14885 14886 #define _mm512_mask_getexp_pd(W, U, A) \ 14887 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 14888 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14889 14890 #define _mm512_maskz_getexp_pd(U, A) \ 14891 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 14892 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 14893 #endif 14894 14895 #ifdef __OPTIMIZE__ 14896 extern __inline __m512 14897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14898 _mm512_roundscale_ps (__m512 __A, const int __imm) 14899 { 14900 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 14901 (__v16sf) 14902 _mm512_undefined_ps (), 14903 -1, 14904 _MM_FROUND_CUR_DIRECTION); 14905 } 14906 14907 extern __inline __m512 14908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14909 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, 14910 const int __imm) 14911 { 14912 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 14913 (__v16sf) __A, 14914 (__mmask16) __B, 14915 _MM_FROUND_CUR_DIRECTION); 14916 } 14917 14918 extern __inline __m512 14919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14920 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) 14921 { 14922 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 14923 __imm, 14924 (__v16sf) 14925 _mm512_setzero_ps (), 14926 (__mmask16) __A, 14927 _MM_FROUND_CUR_DIRECTION); 14928 } 14929 14930 extern __inline __m512d 14931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14932 _mm512_roundscale_pd (__m512d __A, const int __imm) 14933 { 14934 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 14935 (__v8df) 14936 _mm512_undefined_pd (), 14937 -1, 14938 _MM_FROUND_CUR_DIRECTION); 14939 } 14940 14941 extern __inline __m512d 14942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14943 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, 14944 const int __imm) 14945 { 14946 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 14947 (__v8df) __A, 14948 (__mmask8) __B, 14949 _MM_FROUND_CUR_DIRECTION); 14950 } 14951 14952 extern __inline __m512d 14953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14954 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) 14955 { 14956 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 14957 __imm, 14958 (__v8df) 14959 _mm512_setzero_pd (), 14960 (__mmask8) __A, 14961 _MM_FROUND_CUR_DIRECTION); 14962 } 14963 14964 extern __inline __m128 14965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14966 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) 14967 { 14968 return (__m128) 14969 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A, 14970 (__v4sf) __B, __imm, 14971 (__v4sf) 14972 _mm_setzero_ps (), 14973 (__mmask8) -1, 14974 _MM_FROUND_CUR_DIRECTION); 14975 } 14976 14977 14978 extern __inline __m128 14979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14980 _mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D, 14981 const int __imm) 14982 { 14983 return (__m128) 14984 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C, 14985 (__v4sf) __D, __imm, 14986 (__v4sf) __A, 14987 (__mmask8) __B, 14988 _MM_FROUND_CUR_DIRECTION); 14989 } 14990 14991 extern __inline __m128 14992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14993 _mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C, 14994 const int __imm) 14995 { 14996 return (__m128) 14997 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B, 14998 (__v4sf) __C, __imm, 14999 (__v4sf) 15000 _mm_setzero_ps (), 15001 (__mmask8) __A, 15002 _MM_FROUND_CUR_DIRECTION); 15003 } 15004 15005 extern __inline __m128d 15006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15007 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) 15008 { 15009 return (__m128d) 15010 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A, 15011 (__v2df) __B, __imm, 15012 (__v2df) 15013 _mm_setzero_pd (), 15014 (__mmask8) -1, 15015 _MM_FROUND_CUR_DIRECTION); 15016 } 15017 15018 extern __inline __m128d 15019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15020 _mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D, 15021 const int __imm) 15022 { 15023 return (__m128d) 15024 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C, 15025 (__v2df) __D, __imm, 15026 (__v2df) __A, 15027 (__mmask8) __B, 15028 _MM_FROUND_CUR_DIRECTION); 15029 } 15030 15031 extern __inline __m128d 15032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15033 _mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C, 15034 const int __imm) 15035 { 15036 return (__m128d) 15037 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B, 15038 (__v2df) __C, __imm, 15039 (__v2df) 15040 _mm_setzero_pd (), 15041 (__mmask8) __A, 15042 _MM_FROUND_CUR_DIRECTION); 15043 } 15044 15045 #else 15046 #define _mm512_roundscale_ps(A, B) \ 15047 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 15048 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 15049 #define _mm512_mask_roundscale_ps(A, B, C, D) \ 15050 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 15051 (int)(D), \ 15052 (__v16sf)(__m512)(A), \ 15053 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) 15054 #define _mm512_maskz_roundscale_ps(A, B, C) \ 15055 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 15056 (int)(C), \ 15057 (__v16sf)_mm512_setzero_ps(),\ 15058 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) 15059 #define _mm512_roundscale_pd(A, B) \ 15060 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 15061 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 15062 #define _mm512_mask_roundscale_pd(A, B, C, D) \ 15063 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 15064 (int)(D), \ 15065 (__v8df)(__m512d)(A), \ 15066 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) 15067 #define _mm512_maskz_roundscale_pd(A, B, C) \ 15068 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 15069 (int)(C), \ 15070 (__v8df)_mm512_setzero_pd(),\ 15071 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) 15072 #define _mm_roundscale_ss(A, B, I) \ 15073 ((__m128) \ 15074 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ 15075 (__v4sf) (__m128) (B), \ 15076 (int) (I), \ 15077 (__v4sf) _mm_setzero_ps (), \ 15078 (__mmask8) (-1), \ 15079 _MM_FROUND_CUR_DIRECTION)) 15080 #define _mm_mask_roundscale_ss(A, U, B, C, I) \ 15081 ((__m128) \ 15082 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \ 15083 (__v4sf) (__m128) (C), \ 15084 (int) (I), \ 15085 (__v4sf) (__m128) (A), \ 15086 (__mmask8) (U), \ 15087 _MM_FROUND_CUR_DIRECTION)) 15088 #define _mm_maskz_roundscale_ss(U, A, B, I) \ 15089 ((__m128) \ 15090 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ 15091 (__v4sf) (__m128) (B), \ 15092 (int) (I), \ 15093 (__v4sf) _mm_setzero_ps (), \ 15094 (__mmask8) (U), \ 15095 _MM_FROUND_CUR_DIRECTION)) 15096 #define _mm_roundscale_sd(A, B, I) \ 15097 ((__m128d) \ 15098 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ 15099 (__v2df) (__m128d) (B), \ 15100 (int) (I), \ 15101 (__v2df) _mm_setzero_pd (), \ 15102 (__mmask8) (-1), \ 15103 _MM_FROUND_CUR_DIRECTION)) 15104 #define _mm_mask_roundscale_sd(A, U, B, C, I) \ 15105 ((__m128d) \ 15106 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \ 15107 (__v2df) (__m128d) (C), \ 15108 (int) (I), \ 15109 (__v2df) (__m128d) (A), \ 15110 (__mmask8) (U), \ 15111 _MM_FROUND_CUR_DIRECTION)) 15112 #define _mm_maskz_roundscale_sd(U, A, B, I) \ 15113 ((__m128d) \ 15114 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ 15115 (__v2df) (__m128d) (B), \ 15116 (int) (I), \ 15117 (__v2df) _mm_setzero_pd (), \ 15118 (__mmask8) (U), \ 15119 _MM_FROUND_CUR_DIRECTION)) 15120 #endif 15121 15122 #ifdef __OPTIMIZE__ 15123 extern __inline __mmask8 15124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15125 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) 15126 { 15127 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15128 (__v8df) __Y, __P, 15129 (__mmask8) -1, 15130 _MM_FROUND_CUR_DIRECTION); 15131 } 15132 15133 extern __inline __mmask16 15134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15135 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) 15136 { 15137 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15138 (__v16sf) __Y, __P, 15139 (__mmask16) -1, 15140 _MM_FROUND_CUR_DIRECTION); 15141 } 15142 15143 extern __inline __mmask16 15144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15145 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) 15146 { 15147 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15148 (__v16sf) __Y, __P, 15149 (__mmask16) __U, 15150 _MM_FROUND_CUR_DIRECTION); 15151 } 15152 15153 extern __inline __mmask8 15154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15155 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) 15156 { 15157 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15158 (__v8df) __Y, __P, 15159 (__mmask8) __U, 15160 _MM_FROUND_CUR_DIRECTION); 15161 } 15162 15163 extern __inline __mmask8 15164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15165 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) 15166 { 15167 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 15168 (__v2df) __Y, __P, 15169 (__mmask8) -1, 15170 _MM_FROUND_CUR_DIRECTION); 15171 } 15172 15173 extern __inline __mmask8 15174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15175 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) 15176 { 15177 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 15178 (__v2df) __Y, __P, 15179 (__mmask8) __M, 15180 _MM_FROUND_CUR_DIRECTION); 15181 } 15182 15183 extern __inline __mmask8 15184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15185 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) 15186 { 15187 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 15188 (__v4sf) __Y, __P, 15189 (__mmask8) -1, 15190 _MM_FROUND_CUR_DIRECTION); 15191 } 15192 15193 extern __inline __mmask8 15194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15195 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) 15196 { 15197 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 15198 (__v4sf) __Y, __P, 15199 (__mmask8) __M, 15200 _MM_FROUND_CUR_DIRECTION); 15201 } 15202 15203 #else 15204 #define _mm512_cmp_pd_mask(X, Y, P) \ 15205 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 15206 (__v8df)(__m512d)(Y), (int)(P),\ 15207 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 15208 15209 #define _mm512_cmp_ps_mask(X, Y, P) \ 15210 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 15211 (__v16sf)(__m512)(Y), (int)(P),\ 15212 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) 15213 15214 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ 15215 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 15216 (__v8df)(__m512d)(Y), (int)(P),\ 15217 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION)) 15218 15219 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ 15220 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 15221 (__v16sf)(__m512)(Y), (int)(P),\ 15222 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION)) 15223 15224 #define _mm_cmp_sd_mask(X, Y, P) \ 15225 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 15226 (__v2df)(__m128d)(Y), (int)(P),\ 15227 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 15228 15229 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 15230 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 15231 (__v2df)(__m128d)(Y), (int)(P),\ 15232 M,_MM_FROUND_CUR_DIRECTION)) 15233 15234 #define _mm_cmp_ss_mask(X, Y, P) \ 15235 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 15236 (__v4sf)(__m128)(Y), (int)(P), \ 15237 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 15238 15239 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 15240 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 15241 (__v4sf)(__m128)(Y), (int)(P), \ 15242 M,_MM_FROUND_CUR_DIRECTION)) 15243 #endif 15244 15245 extern __inline __mmask8 15246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15247 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y) 15248 { 15249 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15250 (__v8df) __Y, _CMP_EQ_OQ, 15251 (__mmask8) -1, 15252 _MM_FROUND_CUR_DIRECTION); 15253 } 15254 15255 extern __inline __mmask8 15256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15257 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15258 { 15259 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15260 (__v8df) __Y, _CMP_EQ_OQ, 15261 (__mmask8) __U, 15262 _MM_FROUND_CUR_DIRECTION); 15263 } 15264 15265 extern __inline __mmask8 15266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15267 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y) 15268 { 15269 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15270 (__v8df) __Y, _CMP_LT_OS, 15271 (__mmask8) -1, 15272 _MM_FROUND_CUR_DIRECTION); 15273 } 15274 15275 extern __inline __mmask8 15276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15277 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15278 { 15279 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15280 (__v8df) __Y, _CMP_LT_OS, 15281 (__mmask8) __U, 15282 _MM_FROUND_CUR_DIRECTION); 15283 } 15284 15285 extern __inline __mmask8 15286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15287 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y) 15288 { 15289 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15290 (__v8df) __Y, _CMP_LE_OS, 15291 (__mmask8) -1, 15292 _MM_FROUND_CUR_DIRECTION); 15293 } 15294 15295 extern __inline __mmask8 15296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15297 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15298 { 15299 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15300 (__v8df) __Y, _CMP_LE_OS, 15301 (__mmask8) __U, 15302 _MM_FROUND_CUR_DIRECTION); 15303 } 15304 15305 extern __inline __mmask8 15306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15307 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y) 15308 { 15309 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15310 (__v8df) __Y, _CMP_UNORD_Q, 15311 (__mmask8) -1, 15312 _MM_FROUND_CUR_DIRECTION); 15313 } 15314 15315 extern __inline __mmask8 15316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15317 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15318 { 15319 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15320 (__v8df) __Y, _CMP_UNORD_Q, 15321 (__mmask8) __U, 15322 _MM_FROUND_CUR_DIRECTION); 15323 } 15324 15325 extern __inline __mmask8 15326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15327 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y) 15328 { 15329 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15330 (__v8df) __Y, _CMP_NEQ_UQ, 15331 (__mmask8) -1, 15332 _MM_FROUND_CUR_DIRECTION); 15333 } 15334 15335 extern __inline __mmask8 15336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15337 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15338 { 15339 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15340 (__v8df) __Y, _CMP_NEQ_UQ, 15341 (__mmask8) __U, 15342 _MM_FROUND_CUR_DIRECTION); 15343 } 15344 15345 extern __inline __mmask8 15346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15347 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y) 15348 { 15349 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15350 (__v8df) __Y, _CMP_NLT_US, 15351 (__mmask8) -1, 15352 _MM_FROUND_CUR_DIRECTION); 15353 } 15354 15355 extern __inline __mmask8 15356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15357 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15358 { 15359 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15360 (__v8df) __Y, _CMP_NLT_US, 15361 (__mmask8) __U, 15362 _MM_FROUND_CUR_DIRECTION); 15363 } 15364 15365 extern __inline __mmask8 15366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15367 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y) 15368 { 15369 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15370 (__v8df) __Y, _CMP_NLE_US, 15371 (__mmask8) -1, 15372 _MM_FROUND_CUR_DIRECTION); 15373 } 15374 15375 extern __inline __mmask8 15376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15377 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15378 { 15379 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15380 (__v8df) __Y, _CMP_NLE_US, 15381 (__mmask8) __U, 15382 _MM_FROUND_CUR_DIRECTION); 15383 } 15384 15385 extern __inline __mmask8 15386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15387 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y) 15388 { 15389 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15390 (__v8df) __Y, _CMP_ORD_Q, 15391 (__mmask8) -1, 15392 _MM_FROUND_CUR_DIRECTION); 15393 } 15394 15395 extern __inline __mmask8 15396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15397 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 15398 { 15399 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 15400 (__v8df) __Y, _CMP_ORD_Q, 15401 (__mmask8) __U, 15402 _MM_FROUND_CUR_DIRECTION); 15403 } 15404 15405 extern __inline __mmask16 15406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15407 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y) 15408 { 15409 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15410 (__v16sf) __Y, _CMP_EQ_OQ, 15411 (__mmask16) -1, 15412 _MM_FROUND_CUR_DIRECTION); 15413 } 15414 15415 extern __inline __mmask16 15416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15417 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15418 { 15419 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15420 (__v16sf) __Y, _CMP_EQ_OQ, 15421 (__mmask16) __U, 15422 _MM_FROUND_CUR_DIRECTION); 15423 } 15424 15425 extern __inline __mmask16 15426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15427 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y) 15428 { 15429 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15430 (__v16sf) __Y, _CMP_LT_OS, 15431 (__mmask16) -1, 15432 _MM_FROUND_CUR_DIRECTION); 15433 } 15434 15435 extern __inline __mmask16 15436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15437 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15438 { 15439 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15440 (__v16sf) __Y, _CMP_LT_OS, 15441 (__mmask16) __U, 15442 _MM_FROUND_CUR_DIRECTION); 15443 } 15444 15445 extern __inline __mmask16 15446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15447 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y) 15448 { 15449 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15450 (__v16sf) __Y, _CMP_LE_OS, 15451 (__mmask16) -1, 15452 _MM_FROUND_CUR_DIRECTION); 15453 } 15454 15455 extern __inline __mmask16 15456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15457 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15458 { 15459 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15460 (__v16sf) __Y, _CMP_LE_OS, 15461 (__mmask16) __U, 15462 _MM_FROUND_CUR_DIRECTION); 15463 } 15464 15465 extern __inline __mmask16 15466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15467 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y) 15468 { 15469 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15470 (__v16sf) __Y, _CMP_UNORD_Q, 15471 (__mmask16) -1, 15472 _MM_FROUND_CUR_DIRECTION); 15473 } 15474 15475 extern __inline __mmask16 15476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15477 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15478 { 15479 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15480 (__v16sf) __Y, _CMP_UNORD_Q, 15481 (__mmask16) __U, 15482 _MM_FROUND_CUR_DIRECTION); 15483 } 15484 15485 extern __inline __mmask16 15486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15487 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y) 15488 { 15489 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15490 (__v16sf) __Y, _CMP_NEQ_UQ, 15491 (__mmask16) -1, 15492 _MM_FROUND_CUR_DIRECTION); 15493 } 15494 15495 extern __inline __mmask16 15496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15497 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15498 { 15499 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15500 (__v16sf) __Y, _CMP_NEQ_UQ, 15501 (__mmask16) __U, 15502 _MM_FROUND_CUR_DIRECTION); 15503 } 15504 15505 extern __inline __mmask16 15506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15507 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y) 15508 { 15509 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15510 (__v16sf) __Y, _CMP_NLT_US, 15511 (__mmask16) -1, 15512 _MM_FROUND_CUR_DIRECTION); 15513 } 15514 15515 extern __inline __mmask16 15516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15517 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15518 { 15519 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15520 (__v16sf) __Y, _CMP_NLT_US, 15521 (__mmask16) __U, 15522 _MM_FROUND_CUR_DIRECTION); 15523 } 15524 15525 extern __inline __mmask16 15526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15527 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y) 15528 { 15529 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15530 (__v16sf) __Y, _CMP_NLE_US, 15531 (__mmask16) -1, 15532 _MM_FROUND_CUR_DIRECTION); 15533 } 15534 15535 extern __inline __mmask16 15536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15537 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15538 { 15539 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15540 (__v16sf) __Y, _CMP_NLE_US, 15541 (__mmask16) __U, 15542 _MM_FROUND_CUR_DIRECTION); 15543 } 15544 15545 extern __inline __mmask16 15546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15547 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y) 15548 { 15549 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15550 (__v16sf) __Y, _CMP_ORD_Q, 15551 (__mmask16) -1, 15552 _MM_FROUND_CUR_DIRECTION); 15553 } 15554 15555 extern __inline __mmask16 15556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15557 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 15558 { 15559 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 15560 (__v16sf) __Y, _CMP_ORD_Q, 15561 (__mmask16) __U, 15562 _MM_FROUND_CUR_DIRECTION); 15563 } 15564 15565 extern __inline __mmask16 15566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15567 _mm512_kmov (__mmask16 __A) 15568 { 15569 return __builtin_ia32_kmovw (__A); 15570 } 15571 15572 extern __inline __m512 15573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15574 _mm512_castpd_ps (__m512d __A) 15575 { 15576 return (__m512) (__A); 15577 } 15578 15579 extern __inline __m512i 15580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15581 _mm512_castpd_si512 (__m512d __A) 15582 { 15583 return (__m512i) (__A); 15584 } 15585 15586 extern __inline __m512d 15587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15588 _mm512_castps_pd (__m512 __A) 15589 { 15590 return (__m512d) (__A); 15591 } 15592 15593 extern __inline __m512i 15594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15595 _mm512_castps_si512 (__m512 __A) 15596 { 15597 return (__m512i) (__A); 15598 } 15599 15600 extern __inline __m512 15601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15602 _mm512_castsi512_ps (__m512i __A) 15603 { 15604 return (__m512) (__A); 15605 } 15606 15607 extern __inline __m512d 15608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15609 _mm512_castsi512_pd (__m512i __A) 15610 { 15611 return (__m512d) (__A); 15612 } 15613 15614 extern __inline __m128d 15615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15616 _mm512_castpd512_pd128 (__m512d __A) 15617 { 15618 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0); 15619 } 15620 15621 extern __inline __m128 15622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15623 _mm512_castps512_ps128 (__m512 __A) 15624 { 15625 return _mm512_extractf32x4_ps(__A, 0); 15626 } 15627 15628 extern __inline __m128i 15629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15630 _mm512_castsi512_si128 (__m512i __A) 15631 { 15632 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0); 15633 } 15634 15635 extern __inline __m256d 15636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15637 _mm512_castpd512_pd256 (__m512d __A) 15638 { 15639 return _mm512_extractf64x4_pd(__A, 0); 15640 } 15641 15642 extern __inline __m256 15643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15644 _mm512_castps512_ps256 (__m512 __A) 15645 { 15646 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0); 15647 } 15648 15649 extern __inline __m256i 15650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15651 _mm512_castsi512_si256 (__m512i __A) 15652 { 15653 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0); 15654 } 15655 15656 extern __inline __m512d 15657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15658 _mm512_castpd128_pd512 (__m128d __A) 15659 { 15660 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A); 15661 } 15662 15663 extern __inline __m512 15664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15665 _mm512_castps128_ps512 (__m128 __A) 15666 { 15667 return (__m512) __builtin_ia32_ps512_ps((__m128)__A); 15668 } 15669 15670 extern __inline __m512i 15671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15672 _mm512_castsi128_si512 (__m128i __A) 15673 { 15674 return (__m512i) __builtin_ia32_si512_si((__v4si)__A); 15675 } 15676 15677 extern __inline __m512d 15678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15679 _mm512_castpd256_pd512 (__m256d __A) 15680 { 15681 return __builtin_ia32_pd512_256pd (__A); 15682 } 15683 15684 extern __inline __m512 15685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15686 _mm512_castps256_ps512 (__m256 __A) 15687 { 15688 return __builtin_ia32_ps512_256ps (__A); 15689 } 15690 15691 extern __inline __m512i 15692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15693 _mm512_castsi256_si512 (__m256i __A) 15694 { 15695 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A); 15696 } 15697 15698 extern __inline __m512d 15699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15700 _mm512_zextpd128_pd512 (__m128d __A) 15701 { 15702 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0); 15703 } 15704 15705 extern __inline __m512 15706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15707 _mm512_zextps128_ps512 (__m128 __A) 15708 { 15709 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0); 15710 } 15711 15712 extern __inline __m512i 15713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15714 _mm512_zextsi128_si512 (__m128i __A) 15715 { 15716 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0); 15717 } 15718 15719 extern __inline __m512d 15720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15721 _mm512_zextpd256_pd512 (__m256d __A) 15722 { 15723 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0); 15724 } 15725 15726 extern __inline __m512 15727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15728 _mm512_zextps256_ps512 (__m256 __A) 15729 { 15730 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0); 15731 } 15732 15733 extern __inline __m512i 15734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15735 _mm512_zextsi256_si512 (__m256i __A) 15736 { 15737 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0); 15738 } 15739 15740 extern __inline __mmask16 15741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15742 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B) 15743 { 15744 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 15745 (__v16si) __B, 0, 15746 (__mmask16) -1); 15747 } 15748 15749 extern __inline __mmask16 15750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15751 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 15752 { 15753 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 15754 (__v16si) __B, 0, __U); 15755 } 15756 15757 extern __inline __mmask8 15758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15759 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 15760 { 15761 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 15762 (__v8di) __B, 0, __U); 15763 } 15764 15765 extern __inline __mmask8 15766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15767 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B) 15768 { 15769 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 15770 (__v8di) __B, 0, 15771 (__mmask8) -1); 15772 } 15773 15774 extern __inline __mmask16 15775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15776 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B) 15777 { 15778 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 15779 (__v16si) __B, 6, 15780 (__mmask16) -1); 15781 } 15782 15783 extern __inline __mmask16 15784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15785 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 15786 { 15787 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 15788 (__v16si) __B, 6, __U); 15789 } 15790 15791 extern __inline __mmask8 15792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15793 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 15794 { 15795 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 15796 (__v8di) __B, 6, __U); 15797 } 15798 15799 extern __inline __mmask8 15800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15801 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B) 15802 { 15803 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 15804 (__v8di) __B, 6, 15805 (__mmask8) -1); 15806 } 15807 15808 #undef __MM512_REDUCE_OP 15809 #define __MM512_REDUCE_OP(op) \ 15810 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \ 15811 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \ 15812 __m256i __T3 = (__m256i) (__T1 op __T2); \ 15813 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \ 15814 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \ 15815 __v4si __T6 = __T4 op __T5; \ 15816 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 15817 __v4si __T8 = __T6 op __T7; \ 15818 return __T8[0] op __T8[1] 15819 15820 extern __inline int 15821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15822 _mm512_reduce_add_epi32 (__m512i __A) 15823 { 15824 __MM512_REDUCE_OP (+); 15825 } 15826 15827 extern __inline int 15828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15829 _mm512_reduce_mul_epi32 (__m512i __A) 15830 { 15831 __MM512_REDUCE_OP (*); 15832 } 15833 15834 extern __inline int 15835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15836 _mm512_reduce_and_epi32 (__m512i __A) 15837 { 15838 __MM512_REDUCE_OP (&); 15839 } 15840 15841 extern __inline int 15842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15843 _mm512_reduce_or_epi32 (__m512i __A) 15844 { 15845 __MM512_REDUCE_OP (|); 15846 } 15847 15848 extern __inline int 15849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15850 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A) 15851 { 15852 __A = _mm512_maskz_mov_epi32 (__U, __A); 15853 __MM512_REDUCE_OP (+); 15854 } 15855 15856 extern __inline int 15857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15858 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A) 15859 { 15860 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A); 15861 __MM512_REDUCE_OP (*); 15862 } 15863 15864 extern __inline int 15865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15866 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A) 15867 { 15868 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A); 15869 __MM512_REDUCE_OP (&); 15870 } 15871 15872 extern __inline int 15873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15874 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A) 15875 { 15876 __A = _mm512_maskz_mov_epi32 (__U, __A); 15877 __MM512_REDUCE_OP (|); 15878 } 15879 15880 #undef __MM512_REDUCE_OP 15881 #define __MM512_REDUCE_OP(op) \ 15882 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \ 15883 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \ 15884 __m256i __T3 = _mm256_##op (__T1, __T2); \ 15885 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \ 15886 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \ 15887 __m128i __T6 = _mm_##op (__T4, __T5); \ 15888 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \ 15889 (__v4si) { 2, 3, 0, 1 }); \ 15890 __m128i __T8 = _mm_##op (__T6, __T7); \ 15891 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \ 15892 (__v4si) { 1, 0, 1, 0 }); \ 15893 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \ 15894 return __T10[0] 15895 15896 extern __inline int 15897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15898 _mm512_reduce_min_epi32 (__m512i __A) 15899 { 15900 __MM512_REDUCE_OP (min_epi32); 15901 } 15902 15903 extern __inline int 15904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15905 _mm512_reduce_max_epi32 (__m512i __A) 15906 { 15907 __MM512_REDUCE_OP (max_epi32); 15908 } 15909 15910 extern __inline unsigned int 15911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15912 _mm512_reduce_min_epu32 (__m512i __A) 15913 { 15914 __MM512_REDUCE_OP (min_epu32); 15915 } 15916 15917 extern __inline unsigned int 15918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15919 _mm512_reduce_max_epu32 (__m512i __A) 15920 { 15921 __MM512_REDUCE_OP (max_epu32); 15922 } 15923 15924 extern __inline int 15925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15926 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A) 15927 { 15928 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A); 15929 __MM512_REDUCE_OP (min_epi32); 15930 } 15931 15932 extern __inline int 15933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15934 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A) 15935 { 15936 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A); 15937 __MM512_REDUCE_OP (max_epi32); 15938 } 15939 15940 extern __inline unsigned int 15941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15942 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A) 15943 { 15944 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A); 15945 __MM512_REDUCE_OP (min_epu32); 15946 } 15947 15948 extern __inline unsigned int 15949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15950 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A) 15951 { 15952 __A = _mm512_maskz_mov_epi32 (__U, __A); 15953 __MM512_REDUCE_OP (max_epu32); 15954 } 15955 15956 #undef __MM512_REDUCE_OP 15957 #define __MM512_REDUCE_OP(op) \ 15958 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 15959 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 15960 __m256 __T3 = __T1 op __T2; \ 15961 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \ 15962 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \ 15963 __m128 __T6 = __T4 op __T5; \ 15964 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 15965 __m128 __T8 = __T6 op __T7; \ 15966 return __T8[0] op __T8[1] 15967 15968 extern __inline float 15969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15970 _mm512_reduce_add_ps (__m512 __A) 15971 { 15972 __MM512_REDUCE_OP (+); 15973 } 15974 15975 extern __inline float 15976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15977 _mm512_reduce_mul_ps (__m512 __A) 15978 { 15979 __MM512_REDUCE_OP (*); 15980 } 15981 15982 extern __inline float 15983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15984 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A) 15985 { 15986 __A = _mm512_maskz_mov_ps (__U, __A); 15987 __MM512_REDUCE_OP (+); 15988 } 15989 15990 extern __inline float 15991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15992 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A) 15993 { 15994 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A); 15995 __MM512_REDUCE_OP (*); 15996 } 15997 15998 #undef __MM512_REDUCE_OP 15999 #define __MM512_REDUCE_OP(op) \ 16000 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 16001 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 16002 __m256 __T3 = _mm256_##op (__T1, __T2); \ 16003 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \ 16004 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \ 16005 __m128 __T6 = _mm_##op (__T4, __T5); \ 16006 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 16007 __m128 __T8 = _mm_##op (__T6, __T7); \ 16008 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \ 16009 __m128 __T10 = _mm_##op (__T8, __T9); \ 16010 return __T10[0] 16011 16012 extern __inline float 16013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16014 _mm512_reduce_min_ps (__m512 __A) 16015 { 16016 __MM512_REDUCE_OP (min_ps); 16017 } 16018 16019 extern __inline float 16020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16021 _mm512_reduce_max_ps (__m512 __A) 16022 { 16023 __MM512_REDUCE_OP (max_ps); 16024 } 16025 16026 extern __inline float 16027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16028 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A) 16029 { 16030 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A); 16031 __MM512_REDUCE_OP (min_ps); 16032 } 16033 16034 extern __inline float 16035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16036 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A) 16037 { 16038 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A); 16039 __MM512_REDUCE_OP (max_ps); 16040 } 16041 16042 #undef __MM512_REDUCE_OP 16043 #define __MM512_REDUCE_OP(op) \ 16044 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \ 16045 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \ 16046 __m256i __T3 = (__m256i) (__T1 op __T2); \ 16047 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \ 16048 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \ 16049 __v2di __T6 = __T4 op __T5; \ 16050 return __T6[0] op __T6[1] 16051 16052 extern __inline long long 16053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16054 _mm512_reduce_add_epi64 (__m512i __A) 16055 { 16056 __MM512_REDUCE_OP (+); 16057 } 16058 16059 extern __inline long long 16060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16061 _mm512_reduce_mul_epi64 (__m512i __A) 16062 { 16063 __MM512_REDUCE_OP (*); 16064 } 16065 16066 extern __inline long long 16067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16068 _mm512_reduce_and_epi64 (__m512i __A) 16069 { 16070 __MM512_REDUCE_OP (&); 16071 } 16072 16073 extern __inline long long 16074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16075 _mm512_reduce_or_epi64 (__m512i __A) 16076 { 16077 __MM512_REDUCE_OP (|); 16078 } 16079 16080 extern __inline long long 16081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16082 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A) 16083 { 16084 __A = _mm512_maskz_mov_epi64 (__U, __A); 16085 __MM512_REDUCE_OP (+); 16086 } 16087 16088 extern __inline long long 16089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16090 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A) 16091 { 16092 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A); 16093 __MM512_REDUCE_OP (*); 16094 } 16095 16096 extern __inline long long 16097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16098 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A) 16099 { 16100 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A); 16101 __MM512_REDUCE_OP (&); 16102 } 16103 16104 extern __inline long long 16105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16106 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A) 16107 { 16108 __A = _mm512_maskz_mov_epi64 (__U, __A); 16109 __MM512_REDUCE_OP (|); 16110 } 16111 16112 #undef __MM512_REDUCE_OP 16113 #define __MM512_REDUCE_OP(op) \ 16114 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \ 16115 __m512i __T2 = _mm512_##op (__A, __T1); \ 16116 __m512i __T3 \ 16117 = (__m512i) __builtin_shuffle ((__v8di) __T2, \ 16118 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\ 16119 __m512i __T4 = _mm512_##op (__T2, __T3); \ 16120 __m512i __T5 \ 16121 = (__m512i) __builtin_shuffle ((__v8di) __T4, \ 16122 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\ 16123 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \ 16124 return __T6[0] 16125 16126 extern __inline long long 16127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16128 _mm512_reduce_min_epi64 (__m512i __A) 16129 { 16130 __MM512_REDUCE_OP (min_epi64); 16131 } 16132 16133 extern __inline long long 16134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16135 _mm512_reduce_max_epi64 (__m512i __A) 16136 { 16137 __MM512_REDUCE_OP (max_epi64); 16138 } 16139 16140 extern __inline long long 16141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16142 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A) 16143 { 16144 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__), 16145 __U, __A); 16146 __MM512_REDUCE_OP (min_epi64); 16147 } 16148 16149 extern __inline long long 16150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16151 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A) 16152 { 16153 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1), 16154 __U, __A); 16155 __MM512_REDUCE_OP (max_epi64); 16156 } 16157 16158 extern __inline unsigned long long 16159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16160 _mm512_reduce_min_epu64 (__m512i __A) 16161 { 16162 __MM512_REDUCE_OP (min_epu64); 16163 } 16164 16165 extern __inline unsigned long long 16166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16167 _mm512_reduce_max_epu64 (__m512i __A) 16168 { 16169 __MM512_REDUCE_OP (max_epu64); 16170 } 16171 16172 extern __inline unsigned long long 16173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16174 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A) 16175 { 16176 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A); 16177 __MM512_REDUCE_OP (min_epu64); 16178 } 16179 16180 extern __inline unsigned long long 16181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16182 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A) 16183 { 16184 __A = _mm512_maskz_mov_epi64 (__U, __A); 16185 __MM512_REDUCE_OP (max_epu64); 16186 } 16187 16188 #undef __MM512_REDUCE_OP 16189 #define __MM512_REDUCE_OP(op) \ 16190 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \ 16191 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \ 16192 __m256d __T3 = __T1 op __T2; \ 16193 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \ 16194 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \ 16195 __m128d __T6 = __T4 op __T5; \ 16196 return __T6[0] op __T6[1] 16197 16198 extern __inline double 16199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16200 _mm512_reduce_add_pd (__m512d __A) 16201 { 16202 __MM512_REDUCE_OP (+); 16203 } 16204 16205 extern __inline double 16206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16207 _mm512_reduce_mul_pd (__m512d __A) 16208 { 16209 __MM512_REDUCE_OP (*); 16210 } 16211 16212 extern __inline double 16213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16214 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A) 16215 { 16216 __A = _mm512_maskz_mov_pd (__U, __A); 16217 __MM512_REDUCE_OP (+); 16218 } 16219 16220 extern __inline double 16221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16222 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A) 16223 { 16224 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A); 16225 __MM512_REDUCE_OP (*); 16226 } 16227 16228 #undef __MM512_REDUCE_OP 16229 #define __MM512_REDUCE_OP(op) \ 16230 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \ 16231 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \ 16232 __m256d __T3 = _mm256_##op (__T1, __T2); \ 16233 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \ 16234 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \ 16235 __m128d __T6 = _mm_##op (__T4, __T5); \ 16236 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \ 16237 __m128d __T8 = _mm_##op (__T6, __T7); \ 16238 return __T8[0] 16239 16240 extern __inline double 16241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16242 _mm512_reduce_min_pd (__m512d __A) 16243 { 16244 __MM512_REDUCE_OP (min_pd); 16245 } 16246 16247 extern __inline double 16248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16249 _mm512_reduce_max_pd (__m512d __A) 16250 { 16251 __MM512_REDUCE_OP (max_pd); 16252 } 16253 16254 extern __inline double 16255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16256 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A) 16257 { 16258 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A); 16259 __MM512_REDUCE_OP (min_pd); 16260 } 16261 16262 extern __inline double 16263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 16264 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A) 16265 { 16266 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A); 16267 __MM512_REDUCE_OP (max_pd); 16268 } 16269 16270 #undef __MM512_REDUCE_OP 16271 16272 #ifdef __DISABLE_AVX512F__ 16273 #undef __DISABLE_AVX512F__ 16274 #pragma GCC pop_options 16275 #endif /* __DISABLE_AVX512F__ */ 16276 16277 #endif /* _AVX512FINTRIN_H_INCLUDED */ 16278