1 /* Copyright (C) 2014-2020 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512VLINTRIN_H_INCLUDED 29 #define _AVX512VLINTRIN_H_INCLUDED 30 31 #ifndef __AVX512VL__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512vl") 34 #define __DISABLE_AVX512VL__ 35 #endif /* __AVX512VL__ */ 36 37 /* Internal data types for implementing the intrinsics. */ 38 typedef unsigned int __mmask32; 39 40 extern __inline __m256d 41 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 42 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 43 { 44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A, 45 (__v4df) __W, 46 (__mmask8) __U); 47 } 48 49 extern __inline __m256d 50 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 51 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 52 { 53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A, 54 (__v4df) 55 _mm256_setzero_pd (), 56 (__mmask8) __U); 57 } 58 59 extern __inline __m128d 60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 61 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 62 { 63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A, 64 (__v2df) __W, 65 (__mmask8) __U); 66 } 67 68 extern __inline __m128d 69 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 70 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 71 { 72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A, 73 (__v2df) 74 _mm_setzero_pd (), 75 (__mmask8) __U); 76 } 77 78 extern __inline __m256d 79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 80 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 81 { 82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 83 (__v4df) __W, 84 (__mmask8) __U); 85 } 86 87 extern __inline __m256d 88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 89 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 90 { 91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 92 (__v4df) 93 _mm256_setzero_pd (), 94 (__mmask8) __U); 95 } 96 97 extern __inline __m128d 98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 99 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 100 { 101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 102 (__v2df) __W, 103 (__mmask8) __U); 104 } 105 106 extern __inline __m128d 107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 108 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 109 { 110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 111 (__v2df) 112 _mm_setzero_pd (), 113 (__mmask8) __U); 114 } 115 116 extern __inline void 117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 118 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 119 { 120 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 121 (__v4df) __A, 122 (__mmask8) __U); 123 } 124 125 extern __inline void 126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 127 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 128 { 129 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 130 (__v2df) __A, 131 (__mmask8) __U); 132 } 133 134 extern __inline __m256 135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 137 { 138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A, 139 (__v8sf) __W, 140 (__mmask8) __U); 141 } 142 143 extern __inline __m256 144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 145 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 146 { 147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A, 148 (__v8sf) 149 _mm256_setzero_ps (), 150 (__mmask8) __U); 151 } 152 153 extern __inline __m128 154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 155 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 156 { 157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A, 158 (__v4sf) __W, 159 (__mmask8) __U); 160 } 161 162 extern __inline __m128 163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 164 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 165 { 166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A, 167 (__v4sf) 168 _mm_setzero_ps (), 169 (__mmask8) __U); 170 } 171 172 extern __inline __m256 173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 174 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 175 { 176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 177 (__v8sf) __W, 178 (__mmask8) __U); 179 } 180 181 extern __inline __m256 182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 183 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 184 { 185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 186 (__v8sf) 187 _mm256_setzero_ps (), 188 (__mmask8) __U); 189 } 190 191 extern __inline __m128 192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 193 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 194 { 195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 196 (__v4sf) __W, 197 (__mmask8) __U); 198 } 199 200 extern __inline __m128 201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 202 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 203 { 204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 205 (__v4sf) 206 _mm_setzero_ps (), 207 (__mmask8) __U); 208 } 209 210 extern __inline void 211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 212 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 213 { 214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 215 (__v8sf) __A, 216 (__mmask8) __U); 217 } 218 219 extern __inline void 220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 221 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 222 { 223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 224 (__v4sf) __A, 225 (__mmask8) __U); 226 } 227 228 extern __inline __m256i 229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 230 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 231 { 232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A, 233 (__v4di) __W, 234 (__mmask8) __U); 235 } 236 237 extern __inline __m256i 238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 239 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 240 { 241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A, 242 (__v4di) 243 _mm256_setzero_si256 (), 244 (__mmask8) __U); 245 } 246 247 extern __inline __m128i 248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 249 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 250 { 251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A, 252 (__v2di) __W, 253 (__mmask8) __U); 254 } 255 256 extern __inline __m128i 257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 258 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 259 { 260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A, 261 (__v2di) 262 _mm_setzero_si128 (), 263 (__mmask8) __U); 264 } 265 266 extern __inline __m256i 267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 268 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 269 { 270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 271 (__v4di) __W, 272 (__mmask8) 273 __U); 274 } 275 276 extern __inline __m256i 277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 278 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 279 { 280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 281 (__v4di) 282 _mm256_setzero_si256 (), 283 (__mmask8) 284 __U); 285 } 286 287 extern __inline __m128i 288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 289 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 290 { 291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 292 (__v2di) __W, 293 (__mmask8) 294 __U); 295 } 296 297 extern __inline __m128i 298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 299 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 300 { 301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 302 (__v2di) 303 _mm_setzero_si128 (), 304 (__mmask8) 305 __U); 306 } 307 308 extern __inline void 309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 310 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 311 { 312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 313 (__v4di) __A, 314 (__mmask8) __U); 315 } 316 317 extern __inline void 318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 319 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 320 { 321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 322 (__v2di) __A, 323 (__mmask8) __U); 324 } 325 326 extern __inline __m256i 327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 328 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 329 { 330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, 331 (__v8si) __W, 332 (__mmask8) __U); 333 } 334 335 extern __inline __m256i 336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 337 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 338 { 339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A, 340 (__v8si) 341 _mm256_setzero_si256 (), 342 (__mmask8) __U); 343 } 344 345 extern __inline __m128i 346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 347 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 348 { 349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, 350 (__v4si) __W, 351 (__mmask8) __U); 352 } 353 354 extern __inline __m128i 355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 356 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 357 { 358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A, 359 (__v4si) 360 _mm_setzero_si128 (), 361 (__mmask8) __U); 362 } 363 364 extern __inline __m256i 365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 366 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 367 { 368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 369 (__v8si) __W, 370 (__mmask8) 371 __U); 372 } 373 374 extern __inline __m256i 375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 376 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 377 { 378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 379 (__v8si) 380 _mm256_setzero_si256 (), 381 (__mmask8) 382 __U); 383 } 384 385 extern __inline __m128i 386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 387 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 388 { 389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 390 (__v4si) __W, 391 (__mmask8) 392 __U); 393 } 394 395 extern __inline __m128i 396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 397 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 398 { 399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 400 (__v4si) 401 _mm_setzero_si128 (), 402 (__mmask8) 403 __U); 404 } 405 406 extern __inline void 407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 408 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 409 { 410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 411 (__v8si) __A, 412 (__mmask8) __U); 413 } 414 415 extern __inline void 416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 417 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 418 { 419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 420 (__v4si) __A, 421 (__mmask8) __U); 422 } 423 424 extern __inline __m128d 425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 426 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 427 { 428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A, 429 (__v2df) __B, 430 (__v2df) __W, 431 (__mmask8) __U); 432 } 433 434 extern __inline __m128d 435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 436 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B) 437 { 438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A, 439 (__v2df) __B, 440 (__v2df) 441 _mm_setzero_pd (), 442 (__mmask8) __U); 443 } 444 445 extern __inline __m256d 446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 447 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A, 448 __m256d __B) 449 { 450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A, 451 (__v4df) __B, 452 (__v4df) __W, 453 (__mmask8) __U); 454 } 455 456 extern __inline __m256d 457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 458 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) 459 { 460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A, 461 (__v4df) __B, 462 (__v4df) 463 _mm256_setzero_pd (), 464 (__mmask8) __U); 465 } 466 467 extern __inline __m128 468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 469 _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 470 { 471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, 472 (__v4sf) __B, 473 (__v4sf) __W, 474 (__mmask8) __U); 475 } 476 477 extern __inline __m128 478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 479 _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B) 480 { 481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, 482 (__v4sf) __B, 483 (__v4sf) 484 _mm_setzero_ps (), 485 (__mmask8) __U); 486 } 487 488 extern __inline __m256 489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 490 _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 491 { 492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, 493 (__v8sf) __B, 494 (__v8sf) __W, 495 (__mmask8) __U); 496 } 497 498 extern __inline __m256 499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 500 _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B) 501 { 502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, 503 (__v8sf) __B, 504 (__v8sf) 505 _mm256_setzero_ps (), 506 (__mmask8) __U); 507 } 508 509 extern __inline __m128d 510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 511 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 512 { 513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A, 514 (__v2df) __B, 515 (__v2df) __W, 516 (__mmask8) __U); 517 } 518 519 extern __inline __m128d 520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 521 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B) 522 { 523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A, 524 (__v2df) __B, 525 (__v2df) 526 _mm_setzero_pd (), 527 (__mmask8) __U); 528 } 529 530 extern __inline __m256d 531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 532 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A, 533 __m256d __B) 534 { 535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A, 536 (__v4df) __B, 537 (__v4df) __W, 538 (__mmask8) __U); 539 } 540 541 extern __inline __m256d 542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 543 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) 544 { 545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A, 546 (__v4df) __B, 547 (__v4df) 548 _mm256_setzero_pd (), 549 (__mmask8) __U); 550 } 551 552 extern __inline __m128 553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 554 _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 555 { 556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, 557 (__v4sf) __B, 558 (__v4sf) __W, 559 (__mmask8) __U); 560 } 561 562 extern __inline __m128 563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 564 _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B) 565 { 566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, 567 (__v4sf) __B, 568 (__v4sf) 569 _mm_setzero_ps (), 570 (__mmask8) __U); 571 } 572 573 extern __inline __m256 574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 575 _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 576 { 577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, 578 (__v8sf) __B, 579 (__v8sf) __W, 580 (__mmask8) __U); 581 } 582 583 extern __inline __m256 584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 585 _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B) 586 { 587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, 588 (__v8sf) __B, 589 (__v8sf) 590 _mm256_setzero_ps (), 591 (__mmask8) __U); 592 } 593 594 extern __inline void 595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 596 _mm256_store_epi64 (void *__P, __m256i __A) 597 { 598 *(__m256i *) __P = __A; 599 } 600 601 extern __inline void 602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 603 _mm_store_epi64 (void *__P, __m128i __A) 604 { 605 *(__m128i *) __P = __A; 606 } 607 608 extern __inline __m256d 609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 610 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 611 { 612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P, 613 (__v4df) __W, 614 (__mmask8) __U); 615 } 616 617 extern __inline __m256d 618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 619 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 620 { 621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P, 622 (__v4df) 623 _mm256_setzero_pd (), 624 (__mmask8) __U); 625 } 626 627 extern __inline __m128d 628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 629 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 630 { 631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P, 632 (__v2df) __W, 633 (__mmask8) __U); 634 } 635 636 extern __inline __m128d 637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 638 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 639 { 640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P, 641 (__v2df) 642 _mm_setzero_pd (), 643 (__mmask8) __U); 644 } 645 646 extern __inline void 647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 648 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 649 { 650 __builtin_ia32_storeupd256_mask ((double *) __P, 651 (__v4df) __A, 652 (__mmask8) __U); 653 } 654 655 extern __inline void 656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 657 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 658 { 659 __builtin_ia32_storeupd128_mask ((double *) __P, 660 (__v2df) __A, 661 (__mmask8) __U); 662 } 663 664 extern __inline __m256 665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 666 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 667 { 668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P, 669 (__v8sf) __W, 670 (__mmask8) __U); 671 } 672 673 extern __inline __m256 674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 675 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 676 { 677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P, 678 (__v8sf) 679 _mm256_setzero_ps (), 680 (__mmask8) __U); 681 } 682 683 extern __inline __m128 684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 685 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 686 { 687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P, 688 (__v4sf) __W, 689 (__mmask8) __U); 690 } 691 692 extern __inline __m128 693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 694 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 695 { 696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P, 697 (__v4sf) 698 _mm_setzero_ps (), 699 (__mmask8) __U); 700 } 701 702 extern __inline void 703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 704 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 705 { 706 __builtin_ia32_storeups256_mask ((float *) __P, 707 (__v8sf) __A, 708 (__mmask8) __U); 709 } 710 711 extern __inline void 712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 713 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 714 { 715 __builtin_ia32_storeups128_mask ((float *) __P, 716 (__v4sf) __A, 717 (__mmask8) __U); 718 } 719 720 extern __inline __m256i 721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 722 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 723 { 724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P, 725 (__v4di) __W, 726 (__mmask8) __U); 727 } 728 729 extern __inline __m256i 730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 731 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 732 { 733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P, 734 (__v4di) 735 _mm256_setzero_si256 (), 736 (__mmask8) __U); 737 } 738 739 extern __inline __m128i 740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 741 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 742 { 743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P, 744 (__v2di) __W, 745 (__mmask8) __U); 746 } 747 748 extern __inline __m128i 749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 750 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 751 { 752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P, 753 (__v2di) 754 _mm_setzero_si128 (), 755 (__mmask8) __U); 756 } 757 758 extern __inline void 759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 760 _mm256_storeu_epi64 (void *__P, __m256i __A) 761 { 762 *(__m256i_u *) __P = (__m256i_u) __A; 763 } 764 765 extern __inline void 766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 767 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 768 { 769 __builtin_ia32_storedqudi256_mask ((long long *) __P, 770 (__v4di) __A, 771 (__mmask8) __U); 772 } 773 774 extern __inline void 775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 776 _mm_storeu_epi64 (void *__P, __m128i __A) 777 { 778 *(__m128i_u *) __P = (__m128i_u) __A; 779 } 780 781 extern __inline void 782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 783 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 784 { 785 __builtin_ia32_storedqudi128_mask ((long long *) __P, 786 (__v2di) __A, 787 (__mmask8) __U); 788 } 789 790 extern __inline __m256i 791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 792 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 793 { 794 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P, 795 (__v8si) __W, 796 (__mmask8) __U); 797 } 798 799 extern __inline __m256i 800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 801 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 802 { 803 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P, 804 (__v8si) 805 _mm256_setzero_si256 (), 806 (__mmask8) __U); 807 } 808 809 extern __inline __m128i 810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 811 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 812 { 813 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P, 814 (__v4si) __W, 815 (__mmask8) __U); 816 } 817 818 extern __inline __m128i 819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 820 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 821 { 822 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P, 823 (__v4si) 824 _mm_setzero_si128 (), 825 (__mmask8) __U); 826 } 827 828 extern __inline void 829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 830 _mm256_storeu_epi32 (void *__P, __m256i __A) 831 { 832 *(__m256i_u *) __P = (__m256i_u) __A; 833 } 834 835 extern __inline void 836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 837 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 838 { 839 __builtin_ia32_storedqusi256_mask ((int *) __P, 840 (__v8si) __A, 841 (__mmask8) __U); 842 } 843 844 extern __inline void 845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 846 _mm_storeu_epi32 (void *__P, __m128i __A) 847 { 848 *(__m128i_u *) __P = (__m128i_u) __A; 849 } 850 851 extern __inline void 852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 853 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 854 { 855 __builtin_ia32_storedqusi128_mask ((int *) __P, 856 (__v4si) __A, 857 (__mmask8) __U); 858 } 859 860 extern __inline __m256i 861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 862 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 863 { 864 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A, 865 (__v8si) __W, 866 (__mmask8) __U); 867 } 868 869 extern __inline __m256i 870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 871 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A) 872 { 873 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A, 874 (__v8si) 875 _mm256_setzero_si256 (), 876 (__mmask8) __U); 877 } 878 879 extern __inline __m128i 880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 881 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 882 { 883 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A, 884 (__v4si) __W, 885 (__mmask8) __U); 886 } 887 888 extern __inline __m128i 889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 890 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A) 891 { 892 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A, 893 (__v4si) 894 _mm_setzero_si128 (), 895 (__mmask8) __U); 896 } 897 898 extern __inline __m256i 899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 900 _mm256_abs_epi64 (__m256i __A) 901 { 902 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 903 (__v4di) 904 _mm256_setzero_si256 (), 905 (__mmask8) -1); 906 } 907 908 extern __inline __m256i 909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 910 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 911 { 912 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 913 (__v4di) __W, 914 (__mmask8) __U); 915 } 916 917 extern __inline __m256i 918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 919 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) 920 { 921 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 922 (__v4di) 923 _mm256_setzero_si256 (), 924 (__mmask8) __U); 925 } 926 927 extern __inline __m128i 928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 929 _mm_abs_epi64 (__m128i __A) 930 { 931 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 932 (__v2di) 933 _mm_setzero_si128 (), 934 (__mmask8) -1); 935 } 936 937 extern __inline __m128i 938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 939 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 940 { 941 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 942 (__v2di) __W, 943 (__mmask8) __U); 944 } 945 946 extern __inline __m128i 947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 948 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) 949 { 950 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 951 (__v2di) 952 _mm_setzero_si128 (), 953 (__mmask8) __U); 954 } 955 956 extern __inline __m128i 957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 958 _mm256_cvtpd_epu32 (__m256d __A) 959 { 960 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 961 (__v4si) 962 _mm_setzero_si128 (), 963 (__mmask8) -1); 964 } 965 966 extern __inline __m128i 967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 968 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) 969 { 970 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 971 (__v4si) __W, 972 (__mmask8) __U); 973 } 974 975 extern __inline __m128i 976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 977 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) 978 { 979 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 980 (__v4si) 981 _mm_setzero_si128 (), 982 (__mmask8) __U); 983 } 984 985 extern __inline __m128i 986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 987 _mm_cvtpd_epu32 (__m128d __A) 988 { 989 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 990 (__v4si) 991 _mm_setzero_si128 (), 992 (__mmask8) -1); 993 } 994 995 extern __inline __m128i 996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 997 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) 998 { 999 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 1000 (__v4si) __W, 1001 (__mmask8) __U); 1002 } 1003 1004 extern __inline __m128i 1005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1006 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) 1007 { 1008 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 1009 (__v4si) 1010 _mm_setzero_si128 (), 1011 (__mmask8) __U); 1012 } 1013 1014 extern __inline __m256i 1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1016 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) 1017 { 1018 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 1019 (__v8si) __W, 1020 (__mmask8) __U); 1021 } 1022 1023 extern __inline __m256i 1024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1025 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) 1026 { 1027 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 1028 (__v8si) 1029 _mm256_setzero_si256 (), 1030 (__mmask8) __U); 1031 } 1032 1033 extern __inline __m128i 1034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1035 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) 1036 { 1037 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 1038 (__v4si) __W, 1039 (__mmask8) __U); 1040 } 1041 1042 extern __inline __m128i 1043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1044 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) 1045 { 1046 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 1047 (__v4si) 1048 _mm_setzero_si128 (), 1049 (__mmask8) __U); 1050 } 1051 1052 extern __inline __m256i 1053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1054 _mm256_cvttps_epu32 (__m256 __A) 1055 { 1056 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 1057 (__v8si) 1058 _mm256_setzero_si256 (), 1059 (__mmask8) -1); 1060 } 1061 1062 extern __inline __m256i 1063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1064 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) 1065 { 1066 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 1067 (__v8si) __W, 1068 (__mmask8) __U); 1069 } 1070 1071 extern __inline __m256i 1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1073 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) 1074 { 1075 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 1076 (__v8si) 1077 _mm256_setzero_si256 (), 1078 (__mmask8) __U); 1079 } 1080 1081 extern __inline __m128i 1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1083 _mm_cvttps_epu32 (__m128 __A) 1084 { 1085 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 1086 (__v4si) 1087 _mm_setzero_si128 (), 1088 (__mmask8) -1); 1089 } 1090 1091 extern __inline __m128i 1092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1093 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) 1094 { 1095 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 1096 (__v4si) __W, 1097 (__mmask8) __U); 1098 } 1099 1100 extern __inline __m128i 1101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1102 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) 1103 { 1104 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 1105 (__v4si) 1106 _mm_setzero_si128 (), 1107 (__mmask8) __U); 1108 } 1109 1110 extern __inline __m128i 1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1112 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) 1113 { 1114 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 1115 (__v4si) __W, 1116 (__mmask8) __U); 1117 } 1118 1119 extern __inline __m128i 1120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1121 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) 1122 { 1123 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 1124 (__v4si) 1125 _mm_setzero_si128 (), 1126 (__mmask8) __U); 1127 } 1128 1129 extern __inline __m128i 1130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1131 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) 1132 { 1133 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 1134 (__v4si) __W, 1135 (__mmask8) __U); 1136 } 1137 1138 extern __inline __m128i 1139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1140 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) 1141 { 1142 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 1143 (__v4si) 1144 _mm_setzero_si128 (), 1145 (__mmask8) __U); 1146 } 1147 1148 extern __inline __m128i 1149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1150 _mm256_cvttpd_epu32 (__m256d __A) 1151 { 1152 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 1153 (__v4si) 1154 _mm_setzero_si128 (), 1155 (__mmask8) -1); 1156 } 1157 1158 extern __inline __m128i 1159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1160 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) 1161 { 1162 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 1163 (__v4si) __W, 1164 (__mmask8) __U); 1165 } 1166 1167 extern __inline __m128i 1168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1169 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) 1170 { 1171 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 1172 (__v4si) 1173 _mm_setzero_si128 (), 1174 (__mmask8) __U); 1175 } 1176 1177 extern __inline __m128i 1178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1179 _mm_cvttpd_epu32 (__m128d __A) 1180 { 1181 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 1182 (__v4si) 1183 _mm_setzero_si128 (), 1184 (__mmask8) -1); 1185 } 1186 1187 extern __inline __m128i 1188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1189 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) 1190 { 1191 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 1192 (__v4si) __W, 1193 (__mmask8) __U); 1194 } 1195 1196 extern __inline __m128i 1197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1198 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) 1199 { 1200 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 1201 (__v4si) 1202 _mm_setzero_si128 (), 1203 (__mmask8) __U); 1204 } 1205 1206 extern __inline __m128i 1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1208 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) 1209 { 1210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 1211 (__v4si) __W, 1212 (__mmask8) __U); 1213 } 1214 1215 extern __inline __m128i 1216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1217 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) 1218 { 1219 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 1220 (__v4si) 1221 _mm_setzero_si128 (), 1222 (__mmask8) __U); 1223 } 1224 1225 extern __inline __m128i 1226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1227 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) 1228 { 1229 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1230 (__v4si) __W, 1231 (__mmask8) __U); 1232 } 1233 1234 extern __inline __m128i 1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1236 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) 1237 { 1238 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1239 (__v4si) 1240 _mm_setzero_si128 (), 1241 (__mmask8) __U); 1242 } 1243 1244 extern __inline __m256d 1245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1246 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) 1247 { 1248 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 1249 (__v4df) __W, 1250 (__mmask8) __U); 1251 } 1252 1253 extern __inline __m256d 1254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1255 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) 1256 { 1257 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 1258 (__v4df) 1259 _mm256_setzero_pd (), 1260 (__mmask8) __U); 1261 } 1262 1263 extern __inline __m128d 1264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1265 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) 1266 { 1267 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 1268 (__v2df) __W, 1269 (__mmask8) __U); 1270 } 1271 1272 extern __inline __m128d 1273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1274 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) 1275 { 1276 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 1277 (__v2df) 1278 _mm_setzero_pd (), 1279 (__mmask8) __U); 1280 } 1281 1282 extern __inline __m256d 1283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1284 _mm256_cvtepu32_pd (__m128i __A) 1285 { 1286 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 1287 (__v4df) 1288 _mm256_setzero_pd (), 1289 (__mmask8) -1); 1290 } 1291 1292 extern __inline __m256d 1293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1294 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) 1295 { 1296 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 1297 (__v4df) __W, 1298 (__mmask8) __U); 1299 } 1300 1301 extern __inline __m256d 1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1303 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) 1304 { 1305 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 1306 (__v4df) 1307 _mm256_setzero_pd (), 1308 (__mmask8) __U); 1309 } 1310 1311 extern __inline __m128d 1312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1313 _mm_cvtepu32_pd (__m128i __A) 1314 { 1315 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 1316 (__v2df) 1317 _mm_setzero_pd (), 1318 (__mmask8) -1); 1319 } 1320 1321 extern __inline __m128d 1322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1323 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) 1324 { 1325 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 1326 (__v2df) __W, 1327 (__mmask8) __U); 1328 } 1329 1330 extern __inline __m128d 1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1332 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) 1333 { 1334 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 1335 (__v2df) 1336 _mm_setzero_pd (), 1337 (__mmask8) __U); 1338 } 1339 1340 extern __inline __m256 1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1342 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) 1343 { 1344 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 1345 (__v8sf) __W, 1346 (__mmask8) __U); 1347 } 1348 1349 extern __inline __m256 1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1351 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) 1352 { 1353 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 1354 (__v8sf) 1355 _mm256_setzero_ps (), 1356 (__mmask8) __U); 1357 } 1358 1359 extern __inline __m128 1360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1361 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) 1362 { 1363 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 1364 (__v4sf) __W, 1365 (__mmask8) __U); 1366 } 1367 1368 extern __inline __m128 1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1370 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) 1371 { 1372 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 1373 (__v4sf) 1374 _mm_setzero_ps (), 1375 (__mmask8) __U); 1376 } 1377 1378 extern __inline __m256 1379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1380 _mm256_cvtepu32_ps (__m256i __A) 1381 { 1382 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 1383 (__v8sf) 1384 _mm256_setzero_ps (), 1385 (__mmask8) -1); 1386 } 1387 1388 extern __inline __m256 1389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1390 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) 1391 { 1392 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 1393 (__v8sf) __W, 1394 (__mmask8) __U); 1395 } 1396 1397 extern __inline __m256 1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1399 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) 1400 { 1401 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 1402 (__v8sf) 1403 _mm256_setzero_ps (), 1404 (__mmask8) __U); 1405 } 1406 1407 extern __inline __m128 1408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1409 _mm_cvtepu32_ps (__m128i __A) 1410 { 1411 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 1412 (__v4sf) 1413 _mm_setzero_ps (), 1414 (__mmask8) -1); 1415 } 1416 1417 extern __inline __m128 1418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1419 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) 1420 { 1421 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 1422 (__v4sf) __W, 1423 (__mmask8) __U); 1424 } 1425 1426 extern __inline __m128 1427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1428 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) 1429 { 1430 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 1431 (__v4sf) 1432 _mm_setzero_ps (), 1433 (__mmask8) __U); 1434 } 1435 1436 extern __inline __m256d 1437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1438 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) 1439 { 1440 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 1441 (__v4df) __W, 1442 (__mmask8) __U); 1443 } 1444 1445 extern __inline __m256d 1446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1447 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) 1448 { 1449 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 1450 (__v4df) 1451 _mm256_setzero_pd (), 1452 (__mmask8) __U); 1453 } 1454 1455 extern __inline __m128d 1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1457 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) 1458 { 1459 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 1460 (__v2df) __W, 1461 (__mmask8) __U); 1462 } 1463 1464 extern __inline __m128d 1465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1466 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) 1467 { 1468 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 1469 (__v2df) 1470 _mm_setzero_pd (), 1471 (__mmask8) __U); 1472 } 1473 1474 extern __inline __m128i 1475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1476 _mm_cvtepi32_epi8 (__m128i __A) 1477 { 1478 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 1479 (__v16qi) 1480 _mm_undefined_si128 (), 1481 (__mmask8) -1); 1482 } 1483 1484 extern __inline void 1485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1486 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1487 { 1488 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 1489 } 1490 1491 extern __inline __m128i 1492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1493 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 1494 { 1495 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 1496 (__v16qi) __O, __M); 1497 } 1498 1499 extern __inline __m128i 1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1501 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 1502 { 1503 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 1504 (__v16qi) 1505 _mm_setzero_si128 (), 1506 __M); 1507 } 1508 1509 extern __inline __m128i 1510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1511 _mm256_cvtepi32_epi8 (__m256i __A) 1512 { 1513 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 1514 (__v16qi) 1515 _mm_undefined_si128 (), 1516 (__mmask8) -1); 1517 } 1518 1519 extern __inline __m128i 1520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1521 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 1522 { 1523 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 1524 (__v16qi) __O, __M); 1525 } 1526 1527 extern __inline void 1528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1529 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 1530 { 1531 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 1532 } 1533 1534 extern __inline __m128i 1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1536 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 1537 { 1538 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 1539 (__v16qi) 1540 _mm_setzero_si128 (), 1541 __M); 1542 } 1543 1544 extern __inline __m128i 1545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1546 _mm_cvtsepi32_epi8 (__m128i __A) 1547 { 1548 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 1549 (__v16qi) 1550 _mm_undefined_si128 (), 1551 (__mmask8) -1); 1552 } 1553 1554 extern __inline void 1555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1556 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1557 { 1558 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 1559 } 1560 1561 extern __inline __m128i 1562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1563 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 1564 { 1565 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 1566 (__v16qi) __O, __M); 1567 } 1568 1569 extern __inline __m128i 1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1571 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 1572 { 1573 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 1574 (__v16qi) 1575 _mm_setzero_si128 (), 1576 __M); 1577 } 1578 1579 extern __inline __m128i 1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1581 _mm256_cvtsepi32_epi8 (__m256i __A) 1582 { 1583 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 1584 (__v16qi) 1585 _mm_undefined_si128 (), 1586 (__mmask8) -1); 1587 } 1588 1589 extern __inline void 1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1591 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 1592 { 1593 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 1594 } 1595 1596 extern __inline __m128i 1597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1598 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 1599 { 1600 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 1601 (__v16qi) __O, __M); 1602 } 1603 1604 extern __inline __m128i 1605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1606 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 1607 { 1608 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 1609 (__v16qi) 1610 _mm_setzero_si128 (), 1611 __M); 1612 } 1613 1614 extern __inline __m128i 1615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1616 _mm_cvtusepi32_epi8 (__m128i __A) 1617 { 1618 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 1619 (__v16qi) 1620 _mm_undefined_si128 (), 1621 (__mmask8) -1); 1622 } 1623 1624 extern __inline void 1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1626 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1627 { 1628 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 1629 } 1630 1631 extern __inline __m128i 1632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1633 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 1634 { 1635 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 1636 (__v16qi) __O, 1637 __M); 1638 } 1639 1640 extern __inline __m128i 1641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1642 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 1643 { 1644 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 1645 (__v16qi) 1646 _mm_setzero_si128 (), 1647 __M); 1648 } 1649 1650 extern __inline __m128i 1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1652 _mm256_cvtusepi32_epi8 (__m256i __A) 1653 { 1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 1655 (__v16qi) 1656 _mm_undefined_si128 (), 1657 (__mmask8) -1); 1658 } 1659 1660 extern __inline void 1661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1662 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 1663 { 1664 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 1665 } 1666 1667 extern __inline __m128i 1668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1669 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 1670 { 1671 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 1672 (__v16qi) __O, 1673 __M); 1674 } 1675 1676 extern __inline __m128i 1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1678 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 1679 { 1680 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 1681 (__v16qi) 1682 _mm_setzero_si128 (), 1683 __M); 1684 } 1685 1686 extern __inline __m128i 1687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1688 _mm_cvtepi32_epi16 (__m128i __A) 1689 { 1690 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 1691 (__v8hi) 1692 _mm_setzero_si128 (), 1693 (__mmask8) -1); 1694 } 1695 1696 extern __inline void 1697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1698 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 1699 { 1700 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 1701 } 1702 1703 extern __inline __m128i 1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1705 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 1706 { 1707 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 1708 (__v8hi) __O, __M); 1709 } 1710 1711 extern __inline __m128i 1712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1713 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 1714 { 1715 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 1716 (__v8hi) 1717 _mm_setzero_si128 (), 1718 __M); 1719 } 1720 1721 extern __inline __m128i 1722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1723 _mm256_cvtepi32_epi16 (__m256i __A) 1724 { 1725 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 1726 (__v8hi) 1727 _mm_setzero_si128 (), 1728 (__mmask8) -1); 1729 } 1730 1731 extern __inline void 1732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1733 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 1734 { 1735 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 1736 } 1737 1738 extern __inline __m128i 1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1740 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 1741 { 1742 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 1743 (__v8hi) __O, __M); 1744 } 1745 1746 extern __inline __m128i 1747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1748 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 1749 { 1750 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 1751 (__v8hi) 1752 _mm_setzero_si128 (), 1753 __M); 1754 } 1755 1756 extern __inline __m128i 1757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1758 _mm_cvtsepi32_epi16 (__m128i __A) 1759 { 1760 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 1761 (__v8hi) 1762 _mm_setzero_si128 (), 1763 (__mmask8) -1); 1764 } 1765 1766 extern __inline void 1767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1768 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 1769 { 1770 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 1771 } 1772 1773 extern __inline __m128i 1774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1775 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 1776 { 1777 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 1778 (__v8hi)__O, 1779 __M); 1780 } 1781 1782 extern __inline __m128i 1783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1784 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 1785 { 1786 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 1787 (__v8hi) 1788 _mm_setzero_si128 (), 1789 __M); 1790 } 1791 1792 extern __inline __m128i 1793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1794 _mm256_cvtsepi32_epi16 (__m256i __A) 1795 { 1796 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 1797 (__v8hi) 1798 _mm_undefined_si128 (), 1799 (__mmask8) -1); 1800 } 1801 1802 extern __inline void 1803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1804 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 1805 { 1806 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 1807 } 1808 1809 extern __inline __m128i 1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1811 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 1812 { 1813 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 1814 (__v8hi) __O, __M); 1815 } 1816 1817 extern __inline __m128i 1818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1819 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 1820 { 1821 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 1822 (__v8hi) 1823 _mm_setzero_si128 (), 1824 __M); 1825 } 1826 1827 extern __inline __m128i 1828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1829 _mm_cvtusepi32_epi16 (__m128i __A) 1830 { 1831 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 1832 (__v8hi) 1833 _mm_undefined_si128 (), 1834 (__mmask8) -1); 1835 } 1836 1837 extern __inline void 1838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1839 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 1840 { 1841 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 1842 } 1843 1844 extern __inline __m128i 1845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1846 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 1847 { 1848 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 1849 (__v8hi) __O, __M); 1850 } 1851 1852 extern __inline __m128i 1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1854 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 1855 { 1856 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 1857 (__v8hi) 1858 _mm_setzero_si128 (), 1859 __M); 1860 } 1861 1862 extern __inline __m128i 1863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1864 _mm256_cvtusepi32_epi16 (__m256i __A) 1865 { 1866 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 1867 (__v8hi) 1868 _mm_undefined_si128 (), 1869 (__mmask8) -1); 1870 } 1871 1872 extern __inline void 1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1874 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 1875 { 1876 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 1877 } 1878 1879 extern __inline __m128i 1880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1881 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 1882 { 1883 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 1884 (__v8hi) __O, __M); 1885 } 1886 1887 extern __inline __m128i 1888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1889 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 1890 { 1891 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 1892 (__v8hi) 1893 _mm_setzero_si128 (), 1894 __M); 1895 } 1896 1897 extern __inline __m128i 1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1899 _mm_cvtepi64_epi8 (__m128i __A) 1900 { 1901 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 1902 (__v16qi) 1903 _mm_undefined_si128 (), 1904 (__mmask8) -1); 1905 } 1906 1907 extern __inline void 1908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1909 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1910 { 1911 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 1912 } 1913 1914 extern __inline __m128i 1915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1916 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 1917 { 1918 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 1919 (__v16qi) __O, __M); 1920 } 1921 1922 extern __inline __m128i 1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1924 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 1925 { 1926 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 1927 (__v16qi) 1928 _mm_setzero_si128 (), 1929 __M); 1930 } 1931 1932 extern __inline __m128i 1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1934 _mm256_cvtepi64_epi8 (__m256i __A) 1935 { 1936 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 1937 (__v16qi) 1938 _mm_undefined_si128 (), 1939 (__mmask8) -1); 1940 } 1941 1942 extern __inline void 1943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1944 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 1945 { 1946 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 1947 } 1948 1949 extern __inline __m128i 1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1951 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 1952 { 1953 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 1954 (__v16qi) __O, __M); 1955 } 1956 1957 extern __inline __m128i 1958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1959 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 1960 { 1961 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 1962 (__v16qi) 1963 _mm_setzero_si128 (), 1964 __M); 1965 } 1966 1967 extern __inline __m128i 1968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1969 _mm_cvtsepi64_epi8 (__m128i __A) 1970 { 1971 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 1972 (__v16qi) 1973 _mm_undefined_si128 (), 1974 (__mmask8) -1); 1975 } 1976 1977 extern __inline void 1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1979 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1980 { 1981 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 1982 } 1983 1984 extern __inline __m128i 1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1986 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 1987 { 1988 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 1989 (__v16qi) __O, __M); 1990 } 1991 1992 extern __inline __m128i 1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1994 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 1995 { 1996 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 1997 (__v16qi) 1998 _mm_setzero_si128 (), 1999 __M); 2000 } 2001 2002 extern __inline __m128i 2003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2004 _mm256_cvtsepi64_epi8 (__m256i __A) 2005 { 2006 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 2007 (__v16qi) 2008 _mm_undefined_si128 (), 2009 (__mmask8) -1); 2010 } 2011 2012 extern __inline void 2013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2014 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 2015 { 2016 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 2017 } 2018 2019 extern __inline __m128i 2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2021 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 2022 { 2023 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 2024 (__v16qi) __O, __M); 2025 } 2026 2027 extern __inline __m128i 2028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2029 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 2030 { 2031 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 2032 (__v16qi) 2033 _mm_setzero_si128 (), 2034 __M); 2035 } 2036 2037 extern __inline __m128i 2038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2039 _mm_cvtusepi64_epi8 (__m128i __A) 2040 { 2041 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 2042 (__v16qi) 2043 _mm_undefined_si128 (), 2044 (__mmask8) -1); 2045 } 2046 2047 extern __inline void 2048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2049 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 2050 { 2051 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 2052 } 2053 2054 extern __inline __m128i 2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2056 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 2057 { 2058 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 2059 (__v16qi) __O, 2060 __M); 2061 } 2062 2063 extern __inline __m128i 2064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2065 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 2066 { 2067 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 2068 (__v16qi) 2069 _mm_setzero_si128 (), 2070 __M); 2071 } 2072 2073 extern __inline __m128i 2074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2075 _mm256_cvtusepi64_epi8 (__m256i __A) 2076 { 2077 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 2078 (__v16qi) 2079 _mm_undefined_si128 (), 2080 (__mmask8) -1); 2081 } 2082 2083 extern __inline void 2084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2085 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 2086 { 2087 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 2088 } 2089 2090 extern __inline __m128i 2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2092 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 2093 { 2094 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 2095 (__v16qi) __O, 2096 __M); 2097 } 2098 2099 extern __inline __m128i 2100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2101 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 2102 { 2103 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 2104 (__v16qi) 2105 _mm_setzero_si128 (), 2106 __M); 2107 } 2108 2109 extern __inline __m128i 2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2111 _mm_cvtepi64_epi16 (__m128i __A) 2112 { 2113 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 2114 (__v8hi) 2115 _mm_undefined_si128 (), 2116 (__mmask8) -1); 2117 } 2118 2119 extern __inline void 2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2121 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 2122 { 2123 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 2124 } 2125 2126 extern __inline __m128i 2127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2128 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 2129 { 2130 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 2131 (__v8hi)__O, 2132 __M); 2133 } 2134 2135 extern __inline __m128i 2136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2137 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 2138 { 2139 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 2140 (__v8hi) 2141 _mm_setzero_si128 (), 2142 __M); 2143 } 2144 2145 extern __inline __m128i 2146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2147 _mm256_cvtepi64_epi16 (__m256i __A) 2148 { 2149 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 2150 (__v8hi) 2151 _mm_undefined_si128 (), 2152 (__mmask8) -1); 2153 } 2154 2155 extern __inline void 2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2157 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 2158 { 2159 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 2160 } 2161 2162 extern __inline __m128i 2163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2164 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 2165 { 2166 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 2167 (__v8hi) __O, __M); 2168 } 2169 2170 extern __inline __m128i 2171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2172 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 2173 { 2174 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 2175 (__v8hi) 2176 _mm_setzero_si128 (), 2177 __M); 2178 } 2179 2180 extern __inline __m128i 2181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2182 _mm_cvtsepi64_epi16 (__m128i __A) 2183 { 2184 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 2185 (__v8hi) 2186 _mm_undefined_si128 (), 2187 (__mmask8) -1); 2188 } 2189 2190 extern __inline void 2191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2192 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 2193 { 2194 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 2195 } 2196 2197 extern __inline __m128i 2198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2199 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 2200 { 2201 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 2202 (__v8hi) __O, __M); 2203 } 2204 2205 extern __inline __m128i 2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2207 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 2208 { 2209 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 2210 (__v8hi) 2211 _mm_setzero_si128 (), 2212 __M); 2213 } 2214 2215 extern __inline __m128i 2216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2217 _mm256_cvtsepi64_epi16 (__m256i __A) 2218 { 2219 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 2220 (__v8hi) 2221 _mm_undefined_si128 (), 2222 (__mmask8) -1); 2223 } 2224 2225 extern __inline void 2226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2227 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 2228 { 2229 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 2230 } 2231 2232 extern __inline __m128i 2233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2234 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 2235 { 2236 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 2237 (__v8hi) __O, __M); 2238 } 2239 2240 extern __inline __m128i 2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2242 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 2243 { 2244 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 2245 (__v8hi) 2246 _mm_setzero_si128 (), 2247 __M); 2248 } 2249 2250 extern __inline __m128i 2251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2252 _mm_cvtusepi64_epi16 (__m128i __A) 2253 { 2254 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 2255 (__v8hi) 2256 _mm_undefined_si128 (), 2257 (__mmask8) -1); 2258 } 2259 2260 extern __inline void 2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2262 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 2263 { 2264 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 2265 } 2266 2267 extern __inline __m128i 2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2269 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 2270 { 2271 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 2272 (__v8hi) __O, __M); 2273 } 2274 2275 extern __inline __m128i 2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2277 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 2278 { 2279 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 2280 (__v8hi) 2281 _mm_setzero_si128 (), 2282 __M); 2283 } 2284 2285 extern __inline __m128i 2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2287 _mm256_cvtusepi64_epi16 (__m256i __A) 2288 { 2289 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 2290 (__v8hi) 2291 _mm_undefined_si128 (), 2292 (__mmask8) -1); 2293 } 2294 2295 extern __inline void 2296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2297 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 2298 { 2299 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 2300 } 2301 2302 extern __inline __m128i 2303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2304 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 2305 { 2306 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 2307 (__v8hi) __O, __M); 2308 } 2309 2310 extern __inline __m128i 2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2312 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 2313 { 2314 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 2315 (__v8hi) 2316 _mm_setzero_si128 (), 2317 __M); 2318 } 2319 2320 extern __inline __m128i 2321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2322 _mm_cvtepi64_epi32 (__m128i __A) 2323 { 2324 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 2325 (__v4si) 2326 _mm_undefined_si128 (), 2327 (__mmask8) -1); 2328 } 2329 2330 extern __inline void 2331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2332 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 2333 { 2334 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 2335 } 2336 2337 extern __inline __m128i 2338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2339 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 2340 { 2341 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 2342 (__v4si) __O, __M); 2343 } 2344 2345 extern __inline __m128i 2346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2347 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 2348 { 2349 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 2350 (__v4si) 2351 _mm_setzero_si128 (), 2352 __M); 2353 } 2354 2355 extern __inline __m128i 2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2357 _mm256_cvtepi64_epi32 (__m256i __A) 2358 { 2359 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 2360 (__v4si) 2361 _mm_undefined_si128 (), 2362 (__mmask8) -1); 2363 } 2364 2365 extern __inline void 2366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2367 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 2368 { 2369 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 2370 } 2371 2372 extern __inline __m128i 2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2374 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 2375 { 2376 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 2377 (__v4si) __O, __M); 2378 } 2379 2380 extern __inline __m128i 2381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2382 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 2383 { 2384 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 2385 (__v4si) 2386 _mm_setzero_si128 (), 2387 __M); 2388 } 2389 2390 extern __inline __m128i 2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2392 _mm_cvtsepi64_epi32 (__m128i __A) 2393 { 2394 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 2395 (__v4si) 2396 _mm_undefined_si128 (), 2397 (__mmask8) -1); 2398 } 2399 2400 extern __inline void 2401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2402 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 2403 { 2404 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 2405 } 2406 2407 extern __inline __m128i 2408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2409 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 2410 { 2411 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 2412 (__v4si) __O, __M); 2413 } 2414 2415 extern __inline __m128i 2416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2417 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 2418 { 2419 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 2420 (__v4si) 2421 _mm_setzero_si128 (), 2422 __M); 2423 } 2424 2425 extern __inline __m128i 2426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2427 _mm256_cvtsepi64_epi32 (__m256i __A) 2428 { 2429 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 2430 (__v4si) 2431 _mm_undefined_si128 (), 2432 (__mmask8) -1); 2433 } 2434 2435 extern __inline void 2436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2437 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 2438 { 2439 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 2440 } 2441 2442 extern __inline __m128i 2443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2444 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 2445 { 2446 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 2447 (__v4si)__O, 2448 __M); 2449 } 2450 2451 extern __inline __m128i 2452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2453 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 2454 { 2455 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 2456 (__v4si) 2457 _mm_setzero_si128 (), 2458 __M); 2459 } 2460 2461 extern __inline __m128i 2462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2463 _mm_cvtusepi64_epi32 (__m128i __A) 2464 { 2465 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 2466 (__v4si) 2467 _mm_undefined_si128 (), 2468 (__mmask8) -1); 2469 } 2470 2471 extern __inline void 2472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2473 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 2474 { 2475 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 2476 } 2477 2478 extern __inline __m128i 2479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2480 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 2481 { 2482 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 2483 (__v4si) __O, __M); 2484 } 2485 2486 extern __inline __m128i 2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2488 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 2489 { 2490 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 2491 (__v4si) 2492 _mm_setzero_si128 (), 2493 __M); 2494 } 2495 2496 extern __inline __m128i 2497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2498 _mm256_cvtusepi64_epi32 (__m256i __A) 2499 { 2500 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 2501 (__v4si) 2502 _mm_undefined_si128 (), 2503 (__mmask8) -1); 2504 } 2505 2506 extern __inline void 2507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2508 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 2509 { 2510 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 2511 } 2512 2513 extern __inline __m128i 2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2515 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 2516 { 2517 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 2518 (__v4si) __O, __M); 2519 } 2520 2521 extern __inline __m128i 2522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2523 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 2524 { 2525 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 2526 (__v4si) 2527 _mm_setzero_si128 (), 2528 __M); 2529 } 2530 2531 extern __inline __m256 2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2533 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 2534 { 2535 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, 2536 (__v8sf) __O, 2537 __M); 2538 } 2539 2540 extern __inline __m256 2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2542 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 2543 { 2544 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, 2545 (__v8sf) 2546 _mm256_setzero_ps (), 2547 __M); 2548 } 2549 2550 extern __inline __m128 2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2552 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 2553 { 2554 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, 2555 (__v4sf) __O, 2556 __M); 2557 } 2558 2559 extern __inline __m128 2560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2561 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 2562 { 2563 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, 2564 (__v4sf) 2565 _mm_setzero_ps (), 2566 __M); 2567 } 2568 2569 extern __inline __m256d 2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2571 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 2572 { 2573 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, 2574 (__v4df) __O, 2575 __M); 2576 } 2577 2578 extern __inline __m256d 2579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2580 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 2581 { 2582 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, 2583 (__v4df) 2584 _mm256_setzero_pd (), 2585 __M); 2586 } 2587 2588 extern __inline __m256i 2589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2590 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 2591 { 2592 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, 2593 (__v8si) __O, 2594 __M); 2595 } 2596 2597 extern __inline __m256i 2598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2599 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 2600 { 2601 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, 2602 (__v8si) 2603 _mm256_setzero_si256 (), 2604 __M); 2605 } 2606 2607 extern __inline __m256i 2608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2609 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A) 2610 { 2611 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O, 2612 __M); 2613 } 2614 2615 extern __inline __m256i 2616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2617 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A) 2618 { 2619 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, 2620 (__v8si) 2621 _mm256_setzero_si256 (), 2622 __M); 2623 } 2624 2625 extern __inline __m128i 2626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2627 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 2628 { 2629 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, 2630 (__v4si) __O, 2631 __M); 2632 } 2633 2634 extern __inline __m128i 2635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2636 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 2637 { 2638 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, 2639 (__v4si) 2640 _mm_setzero_si128 (), 2641 __M); 2642 } 2643 2644 extern __inline __m128i 2645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2646 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A) 2647 { 2648 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O, 2649 __M); 2650 } 2651 2652 extern __inline __m128i 2653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2654 _mm_maskz_set1_epi32 (__mmask8 __M, int __A) 2655 { 2656 return (__m128i) 2657 __builtin_ia32_pbroadcastd128_gpr_mask (__A, 2658 (__v4si) _mm_setzero_si128 (), 2659 __M); 2660 } 2661 2662 extern __inline __m256i 2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2664 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 2665 { 2666 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, 2667 (__v4di) __O, 2668 __M); 2669 } 2670 2671 extern __inline __m256i 2672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2673 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 2674 { 2675 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, 2676 (__v4di) 2677 _mm256_setzero_si256 (), 2678 __M); 2679 } 2680 2681 extern __inline __m256i 2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2683 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 2684 { 2685 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, 2686 __M); 2687 } 2688 2689 extern __inline __m256i 2690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2691 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 2692 { 2693 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, 2694 (__v4di) 2695 _mm256_setzero_si256 (), 2696 __M); 2697 } 2698 2699 extern __inline __m128i 2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2701 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 2702 { 2703 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, 2704 (__v2di) __O, 2705 __M); 2706 } 2707 2708 extern __inline __m128i 2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2710 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 2711 { 2712 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, 2713 (__v2di) 2714 _mm_setzero_si128 (), 2715 __M); 2716 } 2717 2718 extern __inline __m128i 2719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2720 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 2721 { 2722 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, 2723 __M); 2724 } 2725 2726 extern __inline __m128i 2727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2728 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 2729 { 2730 return (__m128i) 2731 __builtin_ia32_pbroadcastq128_gpr_mask (__A, 2732 (__v2di) _mm_setzero_si128 (), 2733 __M); 2734 } 2735 2736 extern __inline __m256 2737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2738 _mm256_broadcast_f32x4 (__m128 __A) 2739 { 2740 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 2741 (__v8sf)_mm256_undefined_pd (), 2742 (__mmask8) -1); 2743 } 2744 2745 extern __inline __m256 2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2747 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A) 2748 { 2749 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 2750 (__v8sf) __O, 2751 __M); 2752 } 2753 2754 extern __inline __m256 2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2756 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 2757 { 2758 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 2759 (__v8sf) 2760 _mm256_setzero_ps (), 2761 __M); 2762 } 2763 2764 extern __inline __m256i 2765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2766 _mm256_broadcast_i32x4 (__m128i __A) 2767 { 2768 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) 2769 __A, 2770 (__v8si)_mm256_undefined_si256 (), 2771 (__mmask8) -1); 2772 } 2773 2774 extern __inline __m256i 2775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2776 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A) 2777 { 2778 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) 2779 __A, 2780 (__v8si) 2781 __O, __M); 2782 } 2783 2784 extern __inline __m256i 2785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2786 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) 2787 { 2788 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) 2789 __A, 2790 (__v8si) 2791 _mm256_setzero_si256 (), 2792 __M); 2793 } 2794 2795 extern __inline __m256i 2796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2797 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 2798 { 2799 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A, 2800 (__v8si) __W, 2801 (__mmask8) __U); 2802 } 2803 2804 extern __inline __m256i 2805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2806 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 2807 { 2808 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A, 2809 (__v8si) 2810 _mm256_setzero_si256 (), 2811 (__mmask8) __U); 2812 } 2813 2814 extern __inline __m128i 2815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2816 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 2817 { 2818 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A, 2819 (__v4si) __W, 2820 (__mmask8) __U); 2821 } 2822 2823 extern __inline __m128i 2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2825 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 2826 { 2827 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A, 2828 (__v4si) 2829 _mm_setzero_si128 (), 2830 (__mmask8) __U); 2831 } 2832 2833 extern __inline __m256i 2834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2835 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 2836 { 2837 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A, 2838 (__v4di) __W, 2839 (__mmask8) __U); 2840 } 2841 2842 extern __inline __m256i 2843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2844 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 2845 { 2846 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A, 2847 (__v4di) 2848 _mm256_setzero_si256 (), 2849 (__mmask8) __U); 2850 } 2851 2852 extern __inline __m128i 2853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2854 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 2855 { 2856 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A, 2857 (__v2di) __W, 2858 (__mmask8) __U); 2859 } 2860 2861 extern __inline __m128i 2862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2863 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 2864 { 2865 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A, 2866 (__v2di) 2867 _mm_setzero_si128 (), 2868 (__mmask8) __U); 2869 } 2870 2871 extern __inline __m256i 2872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2873 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 2874 { 2875 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A, 2876 (__v8si) __W, 2877 (__mmask8) __U); 2878 } 2879 2880 extern __inline __m256i 2881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2882 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 2883 { 2884 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A, 2885 (__v8si) 2886 _mm256_setzero_si256 (), 2887 (__mmask8) __U); 2888 } 2889 2890 extern __inline __m128i 2891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2892 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 2893 { 2894 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A, 2895 (__v4si) __W, 2896 (__mmask8) __U); 2897 } 2898 2899 extern __inline __m128i 2900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2901 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 2902 { 2903 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A, 2904 (__v4si) 2905 _mm_setzero_si128 (), 2906 (__mmask8) __U); 2907 } 2908 2909 extern __inline __m256i 2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2911 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 2912 { 2913 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A, 2914 (__v4di) __W, 2915 (__mmask8) __U); 2916 } 2917 2918 extern __inline __m256i 2919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2920 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 2921 { 2922 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A, 2923 (__v4di) 2924 _mm256_setzero_si256 (), 2925 (__mmask8) __U); 2926 } 2927 2928 extern __inline __m128i 2929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2930 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 2931 { 2932 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A, 2933 (__v2di) __W, 2934 (__mmask8) __U); 2935 } 2936 2937 extern __inline __m128i 2938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2939 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 2940 { 2941 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A, 2942 (__v2di) 2943 _mm_setzero_si128 (), 2944 (__mmask8) __U); 2945 } 2946 2947 extern __inline __m256i 2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2949 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X) 2950 { 2951 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X, 2952 (__v4di) __W, 2953 (__mmask8) __U); 2954 } 2955 2956 extern __inline __m256i 2957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2958 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X) 2959 { 2960 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X, 2961 (__v4di) 2962 _mm256_setzero_si256 (), 2963 (__mmask8) __U); 2964 } 2965 2966 extern __inline __m128i 2967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2968 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X) 2969 { 2970 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X, 2971 (__v2di) __W, 2972 (__mmask8) __U); 2973 } 2974 2975 extern __inline __m128i 2976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2977 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X) 2978 { 2979 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X, 2980 (__v2di) 2981 _mm_setzero_si128 (), 2982 (__mmask8) __U); 2983 } 2984 2985 extern __inline __m256i 2986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2987 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 2988 { 2989 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A, 2990 (__v8si) __W, 2991 (__mmask8) __U); 2992 } 2993 2994 extern __inline __m256i 2995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2996 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A) 2997 { 2998 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A, 2999 (__v8si) 3000 _mm256_setzero_si256 (), 3001 (__mmask8) __U); 3002 } 3003 3004 extern __inline __m128i 3005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3006 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 3007 { 3008 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A, 3009 (__v4si) __W, 3010 (__mmask8) __U); 3011 } 3012 3013 extern __inline __m128i 3014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3015 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A) 3016 { 3017 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A, 3018 (__v4si) 3019 _mm_setzero_si128 (), 3020 (__mmask8) __U); 3021 } 3022 3023 extern __inline __m256i 3024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3025 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 3026 { 3027 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A, 3028 (__v4di) __W, 3029 (__mmask8) __U); 3030 } 3031 3032 extern __inline __m256i 3033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3034 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 3035 { 3036 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A, 3037 (__v4di) 3038 _mm256_setzero_si256 (), 3039 (__mmask8) __U); 3040 } 3041 3042 extern __inline __m128i 3043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3044 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 3045 { 3046 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A, 3047 (__v2di) __W, 3048 (__mmask8) __U); 3049 } 3050 3051 extern __inline __m128i 3052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3053 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 3054 { 3055 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A, 3056 (__v2di) 3057 _mm_setzero_si128 (), 3058 (__mmask8) __U); 3059 } 3060 3061 extern __inline __m256i 3062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3063 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 3064 { 3065 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A, 3066 (__v8si) __W, 3067 (__mmask8) __U); 3068 } 3069 3070 extern __inline __m256i 3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3072 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A) 3073 { 3074 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A, 3075 (__v8si) 3076 _mm256_setzero_si256 (), 3077 (__mmask8) __U); 3078 } 3079 3080 extern __inline __m128i 3081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3082 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 3083 { 3084 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A, 3085 (__v4si) __W, 3086 (__mmask8) __U); 3087 } 3088 3089 extern __inline __m128i 3090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3091 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A) 3092 { 3093 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A, 3094 (__v4si) 3095 _mm_setzero_si128 (), 3096 (__mmask8) __U); 3097 } 3098 3099 extern __inline __m256i 3100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3101 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 3102 { 3103 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A, 3104 (__v4di) __W, 3105 (__mmask8) __U); 3106 } 3107 3108 extern __inline __m256i 3109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3110 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 3111 { 3112 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A, 3113 (__v4di) 3114 _mm256_setzero_si256 (), 3115 (__mmask8) __U); 3116 } 3117 3118 extern __inline __m128i 3119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3120 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 3121 { 3122 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A, 3123 (__v2di) __W, 3124 (__mmask8) __U); 3125 } 3126 3127 extern __inline __m128i 3128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3129 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 3130 { 3131 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A, 3132 (__v2di) 3133 _mm_setzero_si128 (), 3134 (__mmask8) __U); 3135 } 3136 3137 extern __inline __m256i 3138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3139 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X) 3140 { 3141 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X, 3142 (__v4di) __W, 3143 (__mmask8) __U); 3144 } 3145 3146 extern __inline __m256i 3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3148 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X) 3149 { 3150 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X, 3151 (__v4di) 3152 _mm256_setzero_si256 (), 3153 (__mmask8) __U); 3154 } 3155 3156 extern __inline __m128i 3157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3158 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X) 3159 { 3160 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X, 3161 (__v2di) __W, 3162 (__mmask8) __U); 3163 } 3164 3165 extern __inline __m128i 3166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3167 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X) 3168 { 3169 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X, 3170 (__v2di) 3171 _mm_setzero_si128 (), 3172 (__mmask8) __U); 3173 } 3174 3175 extern __inline __m256d 3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3177 _mm256_rcp14_pd (__m256d __A) 3178 { 3179 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 3180 (__v4df) 3181 _mm256_setzero_pd (), 3182 (__mmask8) -1); 3183 } 3184 3185 extern __inline __m256d 3186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3187 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 3188 { 3189 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 3190 (__v4df) __W, 3191 (__mmask8) __U); 3192 } 3193 3194 extern __inline __m256d 3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3196 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 3197 { 3198 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 3199 (__v4df) 3200 _mm256_setzero_pd (), 3201 (__mmask8) __U); 3202 } 3203 3204 extern __inline __m128d 3205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3206 _mm_rcp14_pd (__m128d __A) 3207 { 3208 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 3209 (__v2df) 3210 _mm_setzero_pd (), 3211 (__mmask8) -1); 3212 } 3213 3214 extern __inline __m128d 3215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3216 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 3217 { 3218 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 3219 (__v2df) __W, 3220 (__mmask8) __U); 3221 } 3222 3223 extern __inline __m128d 3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3225 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 3226 { 3227 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 3228 (__v2df) 3229 _mm_setzero_pd (), 3230 (__mmask8) __U); 3231 } 3232 3233 extern __inline __m256 3234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3235 _mm256_rcp14_ps (__m256 __A) 3236 { 3237 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 3238 (__v8sf) 3239 _mm256_setzero_ps (), 3240 (__mmask8) -1); 3241 } 3242 3243 extern __inline __m256 3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3245 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 3246 { 3247 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 3248 (__v8sf) __W, 3249 (__mmask8) __U); 3250 } 3251 3252 extern __inline __m256 3253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3254 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 3255 { 3256 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 3257 (__v8sf) 3258 _mm256_setzero_ps (), 3259 (__mmask8) __U); 3260 } 3261 3262 extern __inline __m128 3263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3264 _mm_rcp14_ps (__m128 __A) 3265 { 3266 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 3267 (__v4sf) 3268 _mm_setzero_ps (), 3269 (__mmask8) -1); 3270 } 3271 3272 extern __inline __m128 3273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3274 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 3275 { 3276 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 3277 (__v4sf) __W, 3278 (__mmask8) __U); 3279 } 3280 3281 extern __inline __m128 3282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3283 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 3284 { 3285 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 3286 (__v4sf) 3287 _mm_setzero_ps (), 3288 (__mmask8) __U); 3289 } 3290 3291 extern __inline __m256d 3292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3293 _mm256_rsqrt14_pd (__m256d __A) 3294 { 3295 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 3296 (__v4df) 3297 _mm256_setzero_pd (), 3298 (__mmask8) -1); 3299 } 3300 3301 extern __inline __m256d 3302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3303 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 3304 { 3305 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 3306 (__v4df) __W, 3307 (__mmask8) __U); 3308 } 3309 3310 extern __inline __m256d 3311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3312 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 3313 { 3314 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 3315 (__v4df) 3316 _mm256_setzero_pd (), 3317 (__mmask8) __U); 3318 } 3319 3320 extern __inline __m128d 3321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3322 _mm_rsqrt14_pd (__m128d __A) 3323 { 3324 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 3325 (__v2df) 3326 _mm_setzero_pd (), 3327 (__mmask8) -1); 3328 } 3329 3330 extern __inline __m128d 3331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3332 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 3333 { 3334 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 3335 (__v2df) __W, 3336 (__mmask8) __U); 3337 } 3338 3339 extern __inline __m128d 3340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3341 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 3342 { 3343 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 3344 (__v2df) 3345 _mm_setzero_pd (), 3346 (__mmask8) __U); 3347 } 3348 3349 extern __inline __m256 3350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3351 _mm256_rsqrt14_ps (__m256 __A) 3352 { 3353 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 3354 (__v8sf) 3355 _mm256_setzero_ps (), 3356 (__mmask8) -1); 3357 } 3358 3359 extern __inline __m256 3360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3361 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 3362 { 3363 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 3364 (__v8sf) __W, 3365 (__mmask8) __U); 3366 } 3367 3368 extern __inline __m256 3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3370 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 3371 { 3372 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 3373 (__v8sf) 3374 _mm256_setzero_ps (), 3375 (__mmask8) __U); 3376 } 3377 3378 extern __inline __m128 3379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3380 _mm_rsqrt14_ps (__m128 __A) 3381 { 3382 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 3383 (__v4sf) 3384 _mm_setzero_ps (), 3385 (__mmask8) -1); 3386 } 3387 3388 extern __inline __m128 3389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3390 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 3391 { 3392 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 3393 (__v4sf) __W, 3394 (__mmask8) __U); 3395 } 3396 3397 extern __inline __m128 3398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3399 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 3400 { 3401 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 3402 (__v4sf) 3403 _mm_setzero_ps (), 3404 (__mmask8) __U); 3405 } 3406 3407 extern __inline __m256d 3408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3409 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A) 3410 { 3411 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A, 3412 (__v4df) __W, 3413 (__mmask8) __U); 3414 } 3415 3416 extern __inline __m256d 3417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3418 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A) 3419 { 3420 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A, 3421 (__v4df) 3422 _mm256_setzero_pd (), 3423 (__mmask8) __U); 3424 } 3425 3426 extern __inline __m128d 3427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3428 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) 3429 { 3430 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A, 3431 (__v2df) __W, 3432 (__mmask8) __U); 3433 } 3434 3435 extern __inline __m128d 3436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3437 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A) 3438 { 3439 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A, 3440 (__v2df) 3441 _mm_setzero_pd (), 3442 (__mmask8) __U); 3443 } 3444 3445 extern __inline __m256 3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3447 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A) 3448 { 3449 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A, 3450 (__v8sf) __W, 3451 (__mmask8) __U); 3452 } 3453 3454 extern __inline __m256 3455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3456 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A) 3457 { 3458 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A, 3459 (__v8sf) 3460 _mm256_setzero_ps (), 3461 (__mmask8) __U); 3462 } 3463 3464 extern __inline __m128 3465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3466 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A) 3467 { 3468 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A, 3469 (__v4sf) __W, 3470 (__mmask8) __U); 3471 } 3472 3473 extern __inline __m128 3474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3475 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A) 3476 { 3477 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A, 3478 (__v4sf) 3479 _mm_setzero_ps (), 3480 (__mmask8) __U); 3481 } 3482 3483 extern __inline __m256i 3484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3485 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 3486 __m256i __B) 3487 { 3488 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, 3489 (__v8si) __B, 3490 (__v8si) __W, 3491 (__mmask8) __U); 3492 } 3493 3494 extern __inline __m256i 3495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3496 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 3497 { 3498 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, 3499 (__v8si) __B, 3500 (__v8si) 3501 _mm256_setzero_si256 (), 3502 (__mmask8) __U); 3503 } 3504 3505 extern __inline __m256i 3506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3507 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 3508 __m256i __B) 3509 { 3510 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, 3511 (__v4di) __B, 3512 (__v4di) __W, 3513 (__mmask8) __U); 3514 } 3515 3516 extern __inline __m256i 3517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3518 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 3519 { 3520 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, 3521 (__v4di) __B, 3522 (__v4di) 3523 _mm256_setzero_si256 (), 3524 (__mmask8) __U); 3525 } 3526 3527 extern __inline __m256i 3528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3529 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 3530 __m256i __B) 3531 { 3532 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, 3533 (__v8si) __B, 3534 (__v8si) __W, 3535 (__mmask8) __U); 3536 } 3537 3538 extern __inline __m256i 3539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3540 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 3541 { 3542 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, 3543 (__v8si) __B, 3544 (__v8si) 3545 _mm256_setzero_si256 (), 3546 (__mmask8) __U); 3547 } 3548 3549 extern __inline __m256i 3550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3551 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 3552 __m256i __B) 3553 { 3554 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, 3555 (__v4di) __B, 3556 (__v4di) __W, 3557 (__mmask8) __U); 3558 } 3559 3560 extern __inline __m256i 3561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3562 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 3563 { 3564 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, 3565 (__v4di) __B, 3566 (__v4di) 3567 _mm256_setzero_si256 (), 3568 (__mmask8) __U); 3569 } 3570 3571 extern __inline __m128i 3572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3573 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 3574 __m128i __B) 3575 { 3576 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, 3577 (__v4si) __B, 3578 (__v4si) __W, 3579 (__mmask8) __U); 3580 } 3581 3582 extern __inline __m128i 3583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3584 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 3585 { 3586 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, 3587 (__v4si) __B, 3588 (__v4si) 3589 _mm_setzero_si128 (), 3590 (__mmask8) __U); 3591 } 3592 3593 extern __inline __m128i 3594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3595 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 3596 __m128i __B) 3597 { 3598 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, 3599 (__v2di) __B, 3600 (__v2di) __W, 3601 (__mmask8) __U); 3602 } 3603 3604 extern __inline __m128i 3605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3606 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 3607 { 3608 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, 3609 (__v2di) __B, 3610 (__v2di) 3611 _mm_setzero_si128 (), 3612 (__mmask8) __U); 3613 } 3614 3615 extern __inline __m128i 3616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3617 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 3618 __m128i __B) 3619 { 3620 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, 3621 (__v4si) __B, 3622 (__v4si) __W, 3623 (__mmask8) __U); 3624 } 3625 3626 extern __inline __m128i 3627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3628 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 3629 { 3630 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, 3631 (__v4si) __B, 3632 (__v4si) 3633 _mm_setzero_si128 (), 3634 (__mmask8) __U); 3635 } 3636 3637 extern __inline __m128i 3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3639 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 3640 __m128i __B) 3641 { 3642 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, 3643 (__v2di) __B, 3644 (__v2di) __W, 3645 (__mmask8) __U); 3646 } 3647 3648 extern __inline __m128i 3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3650 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 3651 { 3652 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, 3653 (__v2di) __B, 3654 (__v2di) 3655 _mm_setzero_si128 (), 3656 (__mmask8) __U); 3657 } 3658 3659 extern __inline __m256 3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3661 _mm256_getexp_ps (__m256 __A) 3662 { 3663 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3664 (__v8sf) 3665 _mm256_setzero_ps (), 3666 (__mmask8) -1); 3667 } 3668 3669 extern __inline __m256 3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3671 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) 3672 { 3673 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3674 (__v8sf) __W, 3675 (__mmask8) __U); 3676 } 3677 3678 extern __inline __m256 3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3680 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) 3681 { 3682 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3683 (__v8sf) 3684 _mm256_setzero_ps (), 3685 (__mmask8) __U); 3686 } 3687 3688 extern __inline __m256d 3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3690 _mm256_getexp_pd (__m256d __A) 3691 { 3692 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3693 (__v4df) 3694 _mm256_setzero_pd (), 3695 (__mmask8) -1); 3696 } 3697 3698 extern __inline __m256d 3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3700 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) 3701 { 3702 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3703 (__v4df) __W, 3704 (__mmask8) __U); 3705 } 3706 3707 extern __inline __m256d 3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3709 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) 3710 { 3711 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3712 (__v4df) 3713 _mm256_setzero_pd (), 3714 (__mmask8) __U); 3715 } 3716 3717 extern __inline __m128 3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3719 _mm_getexp_ps (__m128 __A) 3720 { 3721 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3722 (__v4sf) 3723 _mm_setzero_ps (), 3724 (__mmask8) -1); 3725 } 3726 3727 extern __inline __m128 3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3729 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) 3730 { 3731 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3732 (__v4sf) __W, 3733 (__mmask8) __U); 3734 } 3735 3736 extern __inline __m128 3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3738 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) 3739 { 3740 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3741 (__v4sf) 3742 _mm_setzero_ps (), 3743 (__mmask8) __U); 3744 } 3745 3746 extern __inline __m128d 3747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3748 _mm_getexp_pd (__m128d __A) 3749 { 3750 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3751 (__v2df) 3752 _mm_setzero_pd (), 3753 (__mmask8) -1); 3754 } 3755 3756 extern __inline __m128d 3757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3758 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) 3759 { 3760 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3761 (__v2df) __W, 3762 (__mmask8) __U); 3763 } 3764 3765 extern __inline __m128d 3766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3767 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) 3768 { 3769 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3770 (__v2df) 3771 _mm_setzero_pd (), 3772 (__mmask8) __U); 3773 } 3774 3775 extern __inline __m256i 3776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3777 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 3778 __m128i __B) 3779 { 3780 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A, 3781 (__v4si) __B, 3782 (__v8si) __W, 3783 (__mmask8) __U); 3784 } 3785 3786 extern __inline __m256i 3787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3788 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 3789 { 3790 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A, 3791 (__v4si) __B, 3792 (__v8si) 3793 _mm256_setzero_si256 (), 3794 (__mmask8) __U); 3795 } 3796 3797 extern __inline __m128i 3798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3799 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 3800 __m128i __B) 3801 { 3802 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A, 3803 (__v4si) __B, 3804 (__v4si) __W, 3805 (__mmask8) __U); 3806 } 3807 3808 extern __inline __m128i 3809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3810 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 3811 { 3812 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A, 3813 (__v4si) __B, 3814 (__v4si) 3815 _mm_setzero_si128 (), 3816 (__mmask8) __U); 3817 } 3818 3819 extern __inline __m256i 3820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3821 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 3822 __m128i __B) 3823 { 3824 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A, 3825 (__v2di) __B, 3826 (__v4di) __W, 3827 (__mmask8) __U); 3828 } 3829 3830 extern __inline __m256i 3831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3832 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 3833 { 3834 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A, 3835 (__v2di) __B, 3836 (__v4di) 3837 _mm256_setzero_si256 (), 3838 (__mmask8) __U); 3839 } 3840 3841 extern __inline __m128i 3842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3843 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 3844 __m128i __B) 3845 { 3846 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A, 3847 (__v2di) __B, 3848 (__v2di) __W, 3849 (__mmask8) __U); 3850 } 3851 3852 extern __inline __m128i 3853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3854 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 3855 { 3856 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A, 3857 (__v2di) __B, 3858 (__v2di) 3859 _mm_setzero_si128 (), 3860 (__mmask8) __U); 3861 } 3862 3863 extern __inline __m256i 3864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3865 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 3866 __m256i __B) 3867 { 3868 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A, 3869 (__v8si) __B, 3870 (__v8si) __W, 3871 (__mmask8) __U); 3872 } 3873 3874 extern __inline __m256i 3875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3876 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 3877 { 3878 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A, 3879 (__v8si) __B, 3880 (__v8si) 3881 _mm256_setzero_si256 (), 3882 (__mmask8) __U); 3883 } 3884 3885 extern __inline __m256d 3886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3887 _mm256_scalef_pd (__m256d __A, __m256d __B) 3888 { 3889 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3890 (__v4df) __B, 3891 (__v4df) 3892 _mm256_setzero_pd (), 3893 (__mmask8) -1); 3894 } 3895 3896 extern __inline __m256d 3897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3898 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3899 __m256d __B) 3900 { 3901 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3902 (__v4df) __B, 3903 (__v4df) __W, 3904 (__mmask8) __U); 3905 } 3906 3907 extern __inline __m256d 3908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3909 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) 3910 { 3911 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3912 (__v4df) __B, 3913 (__v4df) 3914 _mm256_setzero_pd (), 3915 (__mmask8) __U); 3916 } 3917 3918 extern __inline __m256 3919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3920 _mm256_scalef_ps (__m256 __A, __m256 __B) 3921 { 3922 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3923 (__v8sf) __B, 3924 (__v8sf) 3925 _mm256_setzero_ps (), 3926 (__mmask8) -1); 3927 } 3928 3929 extern __inline __m256 3930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3931 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3932 __m256 __B) 3933 { 3934 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3935 (__v8sf) __B, 3936 (__v8sf) __W, 3937 (__mmask8) __U); 3938 } 3939 3940 extern __inline __m256 3941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3942 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) 3943 { 3944 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3945 (__v8sf) __B, 3946 (__v8sf) 3947 _mm256_setzero_ps (), 3948 (__mmask8) __U); 3949 } 3950 3951 extern __inline __m128d 3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3953 _mm_scalef_pd (__m128d __A, __m128d __B) 3954 { 3955 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3956 (__v2df) __B, 3957 (__v2df) 3958 _mm_setzero_pd (), 3959 (__mmask8) -1); 3960 } 3961 3962 extern __inline __m128d 3963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3964 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3965 __m128d __B) 3966 { 3967 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3968 (__v2df) __B, 3969 (__v2df) __W, 3970 (__mmask8) __U); 3971 } 3972 3973 extern __inline __m128d 3974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3975 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) 3976 { 3977 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3978 (__v2df) __B, 3979 (__v2df) 3980 _mm_setzero_pd (), 3981 (__mmask8) __U); 3982 } 3983 3984 extern __inline __m128 3985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3986 _mm_scalef_ps (__m128 __A, __m128 __B) 3987 { 3988 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3989 (__v4sf) __B, 3990 (__v4sf) 3991 _mm_setzero_ps (), 3992 (__mmask8) -1); 3993 } 3994 3995 extern __inline __m128 3996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3997 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 3998 { 3999 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 4000 (__v4sf) __B, 4001 (__v4sf) __W, 4002 (__mmask8) __U); 4003 } 4004 4005 extern __inline __m128 4006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4007 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) 4008 { 4009 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 4010 (__v4sf) __B, 4011 (__v4sf) 4012 _mm_setzero_ps (), 4013 (__mmask8) __U); 4014 } 4015 4016 extern __inline __m256d 4017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4018 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B, 4019 __m256d __C) 4020 { 4021 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 4022 (__v4df) __B, 4023 (__v4df) __C, 4024 (__mmask8) __U); 4025 } 4026 4027 extern __inline __m256d 4028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4029 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C, 4030 __mmask8 __U) 4031 { 4032 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, 4033 (__v4df) __B, 4034 (__v4df) __C, 4035 (__mmask8) __U); 4036 } 4037 4038 extern __inline __m256d 4039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4040 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B, 4041 __m256d __C) 4042 { 4043 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 4044 (__v4df) __B, 4045 (__v4df) __C, 4046 (__mmask8) __U); 4047 } 4048 4049 extern __inline __m128d 4050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4051 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 4052 { 4053 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 4054 (__v2df) __B, 4055 (__v2df) __C, 4056 (__mmask8) __U); 4057 } 4058 4059 extern __inline __m128d 4060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4061 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C, 4062 __mmask8 __U) 4063 { 4064 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, 4065 (__v2df) __B, 4066 (__v2df) __C, 4067 (__mmask8) __U); 4068 } 4069 4070 extern __inline __m128d 4071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4072 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B, 4073 __m128d __C) 4074 { 4075 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 4076 (__v2df) __B, 4077 (__v2df) __C, 4078 (__mmask8) __U); 4079 } 4080 4081 extern __inline __m256 4082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4083 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 4084 { 4085 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 4086 (__v8sf) __B, 4087 (__v8sf) __C, 4088 (__mmask8) __U); 4089 } 4090 4091 extern __inline __m256 4092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4093 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C, 4094 __mmask8 __U) 4095 { 4096 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, 4097 (__v8sf) __B, 4098 (__v8sf) __C, 4099 (__mmask8) __U); 4100 } 4101 4102 extern __inline __m256 4103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4104 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B, 4105 __m256 __C) 4106 { 4107 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 4108 (__v8sf) __B, 4109 (__v8sf) __C, 4110 (__mmask8) __U); 4111 } 4112 4113 extern __inline __m128 4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4115 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4116 { 4117 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 4118 (__v4sf) __B, 4119 (__v4sf) __C, 4120 (__mmask8) __U); 4121 } 4122 4123 extern __inline __m128 4124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4125 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 4126 { 4127 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, 4128 (__v4sf) __B, 4129 (__v4sf) __C, 4130 (__mmask8) __U); 4131 } 4132 4133 extern __inline __m128 4134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4135 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 4136 { 4137 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 4138 (__v4sf) __B, 4139 (__v4sf) __C, 4140 (__mmask8) __U); 4141 } 4142 4143 extern __inline __m256d 4144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4145 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B, 4146 __m256d __C) 4147 { 4148 return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A, 4149 (__v4df) __B, 4150 (__v4df) __C, 4151 (__mmask8) __U); 4152 } 4153 4154 extern __inline __m256d 4155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4156 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C, 4157 __mmask8 __U) 4158 { 4159 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, 4160 (__v4df) __B, 4161 (__v4df) __C, 4162 (__mmask8) __U); 4163 } 4164 4165 extern __inline __m256d 4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4167 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B, 4168 __m256d __C) 4169 { 4170 return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A, 4171 (__v4df) __B, 4172 (__v4df) __C, 4173 (__mmask8) __U); 4174 } 4175 4176 extern __inline __m128d 4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4178 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 4179 { 4180 return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A, 4181 (__v2df) __B, 4182 (__v2df) __C, 4183 (__mmask8) __U); 4184 } 4185 4186 extern __inline __m128d 4187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4188 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C, 4189 __mmask8 __U) 4190 { 4191 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, 4192 (__v2df) __B, 4193 (__v2df) __C, 4194 (__mmask8) __U); 4195 } 4196 4197 extern __inline __m128d 4198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4199 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B, 4200 __m128d __C) 4201 { 4202 return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A, 4203 (__v2df) __B, 4204 (__v2df) __C, 4205 (__mmask8) __U); 4206 } 4207 4208 extern __inline __m256 4209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4210 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 4211 { 4212 return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A, 4213 (__v8sf) __B, 4214 (__v8sf) __C, 4215 (__mmask8) __U); 4216 } 4217 4218 extern __inline __m256 4219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4220 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C, 4221 __mmask8 __U) 4222 { 4223 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, 4224 (__v8sf) __B, 4225 (__v8sf) __C, 4226 (__mmask8) __U); 4227 } 4228 4229 extern __inline __m256 4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4231 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B, 4232 __m256 __C) 4233 { 4234 return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A, 4235 (__v8sf) __B, 4236 (__v8sf) __C, 4237 (__mmask8) __U); 4238 } 4239 4240 extern __inline __m128 4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4242 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4243 { 4244 return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A, 4245 (__v4sf) __B, 4246 (__v4sf) __C, 4247 (__mmask8) __U); 4248 } 4249 4250 extern __inline __m128 4251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4252 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 4253 { 4254 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, 4255 (__v4sf) __B, 4256 (__v4sf) __C, 4257 (__mmask8) __U); 4258 } 4259 4260 extern __inline __m128 4261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4262 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 4263 { 4264 return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A, 4265 (__v4sf) __B, 4266 (__v4sf) __C, 4267 (__mmask8) __U); 4268 } 4269 4270 extern __inline __m256d 4271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4272 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B, 4273 __m256d __C) 4274 { 4275 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 4276 (__v4df) __B, 4277 (__v4df) __C, 4278 (__mmask8) __U); 4279 } 4280 4281 extern __inline __m256d 4282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4283 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C, 4284 __mmask8 __U) 4285 { 4286 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, 4287 (__v4df) __B, 4288 (__v4df) __C, 4289 (__mmask8) 4290 __U); 4291 } 4292 4293 extern __inline __m256d 4294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4295 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B, 4296 __m256d __C) 4297 { 4298 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 4299 (__v4df) __B, 4300 (__v4df) __C, 4301 (__mmask8) 4302 __U); 4303 } 4304 4305 extern __inline __m128d 4306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4307 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B, 4308 __m128d __C) 4309 { 4310 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 4311 (__v2df) __B, 4312 (__v2df) __C, 4313 (__mmask8) __U); 4314 } 4315 4316 extern __inline __m128d 4317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4318 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C, 4319 __mmask8 __U) 4320 { 4321 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, 4322 (__v2df) __B, 4323 (__v2df) __C, 4324 (__mmask8) 4325 __U); 4326 } 4327 4328 extern __inline __m128d 4329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4330 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B, 4331 __m128d __C) 4332 { 4333 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 4334 (__v2df) __B, 4335 (__v2df) __C, 4336 (__mmask8) 4337 __U); 4338 } 4339 4340 extern __inline __m256 4341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4342 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B, 4343 __m256 __C) 4344 { 4345 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 4346 (__v8sf) __B, 4347 (__v8sf) __C, 4348 (__mmask8) __U); 4349 } 4350 4351 extern __inline __m256 4352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4353 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C, 4354 __mmask8 __U) 4355 { 4356 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, 4357 (__v8sf) __B, 4358 (__v8sf) __C, 4359 (__mmask8) __U); 4360 } 4361 4362 extern __inline __m256 4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4364 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B, 4365 __m256 __C) 4366 { 4367 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 4368 (__v8sf) __B, 4369 (__v8sf) __C, 4370 (__mmask8) __U); 4371 } 4372 4373 extern __inline __m128 4374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4375 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4376 { 4377 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 4378 (__v4sf) __B, 4379 (__v4sf) __C, 4380 (__mmask8) __U); 4381 } 4382 4383 extern __inline __m128 4384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4385 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C, 4386 __mmask8 __U) 4387 { 4388 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, 4389 (__v4sf) __B, 4390 (__v4sf) __C, 4391 (__mmask8) __U); 4392 } 4393 4394 extern __inline __m128 4395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4396 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B, 4397 __m128 __C) 4398 { 4399 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 4400 (__v4sf) __B, 4401 (__v4sf) __C, 4402 (__mmask8) __U); 4403 } 4404 4405 extern __inline __m256d 4406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4407 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B, 4408 __m256d __C) 4409 { 4410 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 4411 (__v4df) __B, 4412 -(__v4df) __C, 4413 (__mmask8) __U); 4414 } 4415 4416 extern __inline __m256d 4417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4418 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C, 4419 __mmask8 __U) 4420 { 4421 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, 4422 (__v4df) __B, 4423 (__v4df) __C, 4424 (__mmask8) 4425 __U); 4426 } 4427 4428 extern __inline __m256d 4429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4430 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B, 4431 __m256d __C) 4432 { 4433 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 4434 (__v4df) __B, 4435 -(__v4df) __C, 4436 (__mmask8) 4437 __U); 4438 } 4439 4440 extern __inline __m128d 4441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4442 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B, 4443 __m128d __C) 4444 { 4445 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 4446 (__v2df) __B, 4447 -(__v2df) __C, 4448 (__mmask8) __U); 4449 } 4450 4451 extern __inline __m128d 4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4453 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C, 4454 __mmask8 __U) 4455 { 4456 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, 4457 (__v2df) __B, 4458 (__v2df) __C, 4459 (__mmask8) 4460 __U); 4461 } 4462 4463 extern __inline __m128d 4464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4465 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B, 4466 __m128d __C) 4467 { 4468 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 4469 (__v2df) __B, 4470 -(__v2df) __C, 4471 (__mmask8) 4472 __U); 4473 } 4474 4475 extern __inline __m256 4476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4477 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B, 4478 __m256 __C) 4479 { 4480 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 4481 (__v8sf) __B, 4482 -(__v8sf) __C, 4483 (__mmask8) __U); 4484 } 4485 4486 extern __inline __m256 4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4488 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C, 4489 __mmask8 __U) 4490 { 4491 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, 4492 (__v8sf) __B, 4493 (__v8sf) __C, 4494 (__mmask8) __U); 4495 } 4496 4497 extern __inline __m256 4498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4499 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B, 4500 __m256 __C) 4501 { 4502 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 4503 (__v8sf) __B, 4504 -(__v8sf) __C, 4505 (__mmask8) __U); 4506 } 4507 4508 extern __inline __m128 4509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4510 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4511 { 4512 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 4513 (__v4sf) __B, 4514 -(__v4sf) __C, 4515 (__mmask8) __U); 4516 } 4517 4518 extern __inline __m128 4519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4520 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C, 4521 __mmask8 __U) 4522 { 4523 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, 4524 (__v4sf) __B, 4525 (__v4sf) __C, 4526 (__mmask8) __U); 4527 } 4528 4529 extern __inline __m128 4530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4531 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B, 4532 __m128 __C) 4533 { 4534 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 4535 (__v4sf) __B, 4536 -(__v4sf) __C, 4537 (__mmask8) __U); 4538 } 4539 4540 extern __inline __m256d 4541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4542 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B, 4543 __m256d __C) 4544 { 4545 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, 4546 (__v4df) __B, 4547 (__v4df) __C, 4548 (__mmask8) __U); 4549 } 4550 4551 extern __inline __m256d 4552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4553 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C, 4554 __mmask8 __U) 4555 { 4556 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A, 4557 (__v4df) __B, 4558 (__v4df) __C, 4559 (__mmask8) __U); 4560 } 4561 4562 extern __inline __m256d 4563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4564 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B, 4565 __m256d __C) 4566 { 4567 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A, 4568 (__v4df) __B, 4569 (__v4df) __C, 4570 (__mmask8) __U); 4571 } 4572 4573 extern __inline __m128d 4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4575 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, 4576 __m128d __C) 4577 { 4578 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, 4579 (__v2df) __B, 4580 (__v2df) __C, 4581 (__mmask8) __U); 4582 } 4583 4584 extern __inline __m128d 4585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4586 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C, 4587 __mmask8 __U) 4588 { 4589 return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A, 4590 (__v2df) __B, 4591 (__v2df) __C, 4592 (__mmask8) __U); 4593 } 4594 4595 extern __inline __m128d 4596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4597 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B, 4598 __m128d __C) 4599 { 4600 return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A, 4601 (__v2df) __B, 4602 (__v2df) __C, 4603 (__mmask8) __U); 4604 } 4605 4606 extern __inline __m256 4607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4608 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, 4609 __m256 __C) 4610 { 4611 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, 4612 (__v8sf) __B, 4613 (__v8sf) __C, 4614 (__mmask8) __U); 4615 } 4616 4617 extern __inline __m256 4618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4619 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C, 4620 __mmask8 __U) 4621 { 4622 return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A, 4623 (__v8sf) __B, 4624 (__v8sf) __C, 4625 (__mmask8) __U); 4626 } 4627 4628 extern __inline __m256 4629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4630 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B, 4631 __m256 __C) 4632 { 4633 return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A, 4634 (__v8sf) __B, 4635 (__v8sf) __C, 4636 (__mmask8) __U); 4637 } 4638 4639 extern __inline __m128 4640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4641 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4642 { 4643 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, 4644 (__v4sf) __B, 4645 (__v4sf) __C, 4646 (__mmask8) __U); 4647 } 4648 4649 extern __inline __m128 4650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4651 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 4652 { 4653 return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A, 4654 (__v4sf) __B, 4655 (__v4sf) __C, 4656 (__mmask8) __U); 4657 } 4658 4659 extern __inline __m128 4660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4661 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 4662 { 4663 return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A, 4664 (__v4sf) __B, 4665 (__v4sf) __C, 4666 (__mmask8) __U); 4667 } 4668 4669 extern __inline __m256d 4670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4671 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B, 4672 __m256d __C) 4673 { 4674 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, 4675 (__v4df) __B, 4676 (__v4df) __C, 4677 (__mmask8) __U); 4678 } 4679 4680 extern __inline __m256d 4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4682 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C, 4683 __mmask8 __U) 4684 { 4685 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, 4686 (__v4df) __B, 4687 (__v4df) __C, 4688 (__mmask8) __U); 4689 } 4690 4691 extern __inline __m256d 4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4693 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B, 4694 __m256d __C) 4695 { 4696 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A, 4697 (__v4df) __B, 4698 (__v4df) __C, 4699 (__mmask8) __U); 4700 } 4701 4702 extern __inline __m128d 4703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4704 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, 4705 __m128d __C) 4706 { 4707 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, 4708 (__v2df) __B, 4709 (__v2df) __C, 4710 (__mmask8) __U); 4711 } 4712 4713 extern __inline __m128d 4714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4715 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C, 4716 __mmask8 __U) 4717 { 4718 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, 4719 (__v2df) __B, 4720 (__v2df) __C, 4721 (__mmask8) __U); 4722 } 4723 4724 extern __inline __m128d 4725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4726 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B, 4727 __m128d __C) 4728 { 4729 return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A, 4730 (__v2df) __B, 4731 (__v2df) __C, 4732 (__mmask8) __U); 4733 } 4734 4735 extern __inline __m256 4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4737 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, 4738 __m256 __C) 4739 { 4740 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, 4741 (__v8sf) __B, 4742 (__v8sf) __C, 4743 (__mmask8) __U); 4744 } 4745 4746 extern __inline __m256 4747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4748 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C, 4749 __mmask8 __U) 4750 { 4751 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, 4752 (__v8sf) __B, 4753 (__v8sf) __C, 4754 (__mmask8) __U); 4755 } 4756 4757 extern __inline __m256 4758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4759 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B, 4760 __m256 __C) 4761 { 4762 return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A, 4763 (__v8sf) __B, 4764 (__v8sf) __C, 4765 (__mmask8) __U); 4766 } 4767 4768 extern __inline __m128 4769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4770 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 4771 { 4772 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, 4773 (__v4sf) __B, 4774 (__v4sf) __C, 4775 (__mmask8) __U); 4776 } 4777 4778 extern __inline __m128 4779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4780 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 4781 { 4782 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, 4783 (__v4sf) __B, 4784 (__v4sf) __C, 4785 (__mmask8) __U); 4786 } 4787 4788 extern __inline __m128 4789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4790 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 4791 { 4792 return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A, 4793 (__v4sf) __B, 4794 (__v4sf) __C, 4795 (__mmask8) __U); 4796 } 4797 4798 extern __inline __m128i 4799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4800 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4801 __m128i __B) 4802 { 4803 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, 4804 (__v4si) __B, 4805 (__v4si) __W, 4806 (__mmask8) __U); 4807 } 4808 4809 extern __inline __m128i 4810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4811 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4812 { 4813 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A, 4814 (__v4si) __B, 4815 (__v4si) 4816 _mm_setzero_si128 (), 4817 (__mmask8) __U); 4818 } 4819 4820 extern __inline __m256i 4821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4822 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4823 __m256i __B) 4824 { 4825 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A, 4826 (__v8si) __B, 4827 (__v8si) __W, 4828 (__mmask8) __U); 4829 } 4830 4831 extern __inline __m256i 4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4833 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4834 { 4835 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A, 4836 (__v8si) __B, 4837 (__v8si) 4838 _mm256_setzero_si256 (), 4839 (__mmask8) __U); 4840 } 4841 4842 extern __inline __m128i 4843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4844 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4845 __m128i __B) 4846 { 4847 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A, 4848 (__v4si) __B, 4849 (__v4si) __W, 4850 (__mmask8) __U); 4851 } 4852 4853 extern __inline __m128i 4854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4855 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4856 { 4857 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A, 4858 (__v4si) __B, 4859 (__v4si) 4860 _mm_setzero_si128 (), 4861 (__mmask8) __U); 4862 } 4863 4864 extern __inline __m256i 4865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4866 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4867 __m256i __B) 4868 { 4869 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A, 4870 (__v8si) __B, 4871 (__v8si) __W, 4872 (__mmask8) __U); 4873 } 4874 4875 extern __inline __m256i 4876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4877 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4878 { 4879 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A, 4880 (__v8si) __B, 4881 (__v8si) 4882 _mm256_setzero_si256 (), 4883 (__mmask8) __U); 4884 } 4885 4886 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 4887 _mm256_or_epi32 (__m256i __A, __m256i __B) 4888 { 4889 return (__m256i) ((__v8su)__A | (__v8su)__B); 4890 } 4891 4892 extern __inline __m128i 4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4894 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4895 { 4896 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, 4897 (__v4si) __B, 4898 (__v4si) __W, 4899 (__mmask8) __U); 4900 } 4901 4902 extern __inline __m128i 4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4904 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4905 { 4906 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A, 4907 (__v4si) __B, 4908 (__v4si) 4909 _mm_setzero_si128 (), 4910 (__mmask8) __U); 4911 } 4912 4913 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 4914 _mm_or_epi32 (__m128i __A, __m128i __B) 4915 { 4916 return (__m128i) ((__v4su)__A | (__v4su)__B); 4917 } 4918 4919 extern __inline __m256i 4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4921 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4922 __m256i __B) 4923 { 4924 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A, 4925 (__v8si) __B, 4926 (__v8si) __W, 4927 (__mmask8) __U); 4928 } 4929 4930 extern __inline __m256i 4931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4932 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4933 { 4934 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A, 4935 (__v8si) __B, 4936 (__v8si) 4937 _mm256_setzero_si256 (), 4938 (__mmask8) __U); 4939 } 4940 4941 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 4942 _mm256_xor_epi32 (__m256i __A, __m256i __B) 4943 { 4944 return (__m256i) ((__v8su)__A ^ (__v8su)__B); 4945 } 4946 4947 extern __inline __m128i 4948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4949 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4950 __m128i __B) 4951 { 4952 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A, 4953 (__v4si) __B, 4954 (__v4si) __W, 4955 (__mmask8) __U); 4956 } 4957 4958 extern __inline __m128i 4959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4960 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4961 { 4962 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A, 4963 (__v4si) __B, 4964 (__v4si) 4965 _mm_setzero_si128 (), 4966 (__mmask8) __U); 4967 } 4968 4969 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 4970 _mm_xor_epi32 (__m128i __A, __m128i __B) 4971 { 4972 return (__m128i) ((__v4su)__A ^ (__v4su)__B); 4973 } 4974 4975 extern __inline __m128 4976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4977 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) 4978 { 4979 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 4980 (__v4sf) __W, 4981 (__mmask8) __U); 4982 } 4983 4984 extern __inline __m128 4985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4986 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) 4987 { 4988 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 4989 (__v4sf) 4990 _mm_setzero_ps (), 4991 (__mmask8) __U); 4992 } 4993 4994 extern __inline __m128 4995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4996 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) 4997 { 4998 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 4999 (__v4sf) __W, 5000 (__mmask8) __U); 5001 } 5002 5003 extern __inline __m128 5004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5005 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) 5006 { 5007 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 5008 (__v4sf) 5009 _mm_setzero_ps (), 5010 (__mmask8) __U); 5011 } 5012 5013 extern __inline __m256i 5014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5015 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) 5016 { 5017 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 5018 (__v8si) __W, 5019 (__mmask8) __U); 5020 } 5021 5022 extern __inline __m256i 5023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5024 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) 5025 { 5026 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 5027 (__v8si) 5028 _mm256_setzero_si256 (), 5029 (__mmask8) __U); 5030 } 5031 5032 extern __inline __m128i 5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5034 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) 5035 { 5036 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 5037 (__v4si) __W, 5038 (__mmask8) __U); 5039 } 5040 5041 extern __inline __m128i 5042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5043 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) 5044 { 5045 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 5046 (__v4si) 5047 _mm_setzero_si128 (), 5048 (__mmask8) __U); 5049 } 5050 5051 extern __inline __m256i 5052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5053 _mm256_cvtps_epu32 (__m256 __A) 5054 { 5055 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 5056 (__v8si) 5057 _mm256_setzero_si256 (), 5058 (__mmask8) -1); 5059 } 5060 5061 extern __inline __m256i 5062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5063 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) 5064 { 5065 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 5066 (__v8si) __W, 5067 (__mmask8) __U); 5068 } 5069 5070 extern __inline __m256i 5071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5072 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) 5073 { 5074 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 5075 (__v8si) 5076 _mm256_setzero_si256 (), 5077 (__mmask8) __U); 5078 } 5079 5080 extern __inline __m128i 5081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5082 _mm_cvtps_epu32 (__m128 __A) 5083 { 5084 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 5085 (__v4si) 5086 _mm_setzero_si128 (), 5087 (__mmask8) -1); 5088 } 5089 5090 extern __inline __m128i 5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5092 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) 5093 { 5094 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 5095 (__v4si) __W, 5096 (__mmask8) __U); 5097 } 5098 5099 extern __inline __m128i 5100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5101 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) 5102 { 5103 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 5104 (__v4si) 5105 _mm_setzero_si128 (), 5106 (__mmask8) __U); 5107 } 5108 5109 extern __inline __m256d 5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5111 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5112 { 5113 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A, 5114 (__v4df) __W, 5115 (__mmask8) __U); 5116 } 5117 5118 extern __inline __m256d 5119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5120 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5121 { 5122 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A, 5123 (__v4df) 5124 _mm256_setzero_pd (), 5125 (__mmask8) __U); 5126 } 5127 5128 extern __inline __m128d 5129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5130 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5131 { 5132 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A, 5133 (__v2df) __W, 5134 (__mmask8) __U); 5135 } 5136 5137 extern __inline __m128d 5138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5139 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5140 { 5141 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A, 5142 (__v2df) 5143 _mm_setzero_pd (), 5144 (__mmask8) __U); 5145 } 5146 5147 extern __inline __m256 5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5149 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 5150 { 5151 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, 5152 (__v8sf) __W, 5153 (__mmask8) __U); 5154 } 5155 5156 extern __inline __m256 5157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5158 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 5159 { 5160 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, 5161 (__v8sf) 5162 _mm256_setzero_ps (), 5163 (__mmask8) __U); 5164 } 5165 5166 extern __inline __m128 5167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5168 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 5169 { 5170 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, 5171 (__v4sf) __W, 5172 (__mmask8) __U); 5173 } 5174 5175 extern __inline __m128 5176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5177 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 5178 { 5179 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, 5180 (__v4sf) 5181 _mm_setzero_ps (), 5182 (__mmask8) __U); 5183 } 5184 5185 extern __inline __m256 5186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5187 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 5188 { 5189 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, 5190 (__v8sf) __W, 5191 (__mmask8) __U); 5192 } 5193 5194 extern __inline __m256 5195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5196 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 5197 { 5198 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, 5199 (__v8sf) 5200 _mm256_setzero_ps (), 5201 (__mmask8) __U); 5202 } 5203 5204 extern __inline __m128 5205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5206 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 5207 { 5208 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, 5209 (__v4sf) __W, 5210 (__mmask8) __U); 5211 } 5212 5213 extern __inline __m128 5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5215 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 5216 { 5217 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, 5218 (__v4sf) 5219 _mm_setzero_ps (), 5220 (__mmask8) __U); 5221 } 5222 5223 extern __inline __m128i 5224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5225 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5226 __m128i __B) 5227 { 5228 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A, 5229 (__v4si) __B, 5230 (__v4si) __W, 5231 (__mmask8) __U); 5232 } 5233 5234 extern __inline __m128i 5235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5236 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5237 { 5238 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A, 5239 (__v4si) __B, 5240 (__v4si) 5241 _mm_setzero_si128 (), 5242 (__mmask8) __U); 5243 } 5244 5245 extern __inline __m256i 5246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5247 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5248 __m256i __B) 5249 { 5250 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A, 5251 (__v8si) __B, 5252 (__v8si) __W, 5253 (__mmask8) __U); 5254 } 5255 5256 extern __inline __m256i 5257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5258 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5259 { 5260 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A, 5261 (__v8si) __B, 5262 (__v8si) 5263 _mm256_setzero_si256 (), 5264 (__mmask8) __U); 5265 } 5266 5267 extern __inline __m128i 5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5269 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5270 __m128i __B) 5271 { 5272 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A, 5273 (__v2di) __B, 5274 (__v2di) __W, 5275 (__mmask8) __U); 5276 } 5277 5278 extern __inline __m128i 5279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5280 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5281 { 5282 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A, 5283 (__v2di) __B, 5284 (__v2di) 5285 _mm_setzero_si128 (), 5286 (__mmask8) __U); 5287 } 5288 5289 extern __inline __m256i 5290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5291 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5292 __m256i __B) 5293 { 5294 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A, 5295 (__v4di) __B, 5296 (__v4di) __W, 5297 (__mmask8) __U); 5298 } 5299 5300 extern __inline __m256i 5301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5302 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5303 { 5304 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A, 5305 (__v4di) __B, 5306 (__v4di) 5307 _mm256_setzero_si256 (), 5308 (__mmask8) __U); 5309 } 5310 5311 extern __inline __m128i 5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5313 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5314 __m128i __B) 5315 { 5316 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A, 5317 (__v4si) __B, 5318 (__v4si) __W, 5319 (__mmask8) __U); 5320 } 5321 5322 extern __inline __m128i 5323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5324 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5325 { 5326 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A, 5327 (__v4si) __B, 5328 (__v4si) 5329 _mm_setzero_si128 (), 5330 (__mmask8) __U); 5331 } 5332 5333 extern __inline __m256i 5334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5335 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5336 __m256i __B) 5337 { 5338 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A, 5339 (__v8si) __B, 5340 (__v8si) __W, 5341 (__mmask8) __U); 5342 } 5343 5344 extern __inline __m256i 5345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5346 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5347 { 5348 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A, 5349 (__v8si) __B, 5350 (__v8si) 5351 _mm256_setzero_si256 (), 5352 (__mmask8) __U); 5353 } 5354 5355 extern __inline __m128i 5356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5357 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5358 __m128i __B) 5359 { 5360 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A, 5361 (__v2di) __B, 5362 (__v2di) __W, 5363 (__mmask8) __U); 5364 } 5365 5366 extern __inline __m128i 5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5368 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5369 { 5370 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A, 5371 (__v2di) __B, 5372 (__v2di) 5373 _mm_setzero_si128 (), 5374 (__mmask8) __U); 5375 } 5376 5377 extern __inline __m256i 5378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5379 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5380 __m256i __B) 5381 { 5382 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A, 5383 (__v4di) __B, 5384 (__v4di) __W, 5385 (__mmask8) __U); 5386 } 5387 5388 extern __inline __m256i 5389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5390 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5391 { 5392 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A, 5393 (__v4di) __B, 5394 (__v4di) 5395 _mm256_setzero_si256 (), 5396 (__mmask8) __U); 5397 } 5398 5399 extern __inline __mmask8 5400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5401 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B) 5402 { 5403 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A, 5404 (__v4si) __B, 0, 5405 (__mmask8) -1); 5406 } 5407 5408 extern __inline __mmask8 5409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5410 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B) 5411 { 5412 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A, 5413 (__v4si) __B, 5414 (__mmask8) -1); 5415 } 5416 5417 extern __inline __mmask8 5418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5419 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5420 { 5421 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A, 5422 (__v4si) __B, 0, __U); 5423 } 5424 5425 extern __inline __mmask8 5426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5427 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5428 { 5429 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A, 5430 (__v4si) __B, __U); 5431 } 5432 5433 extern __inline __mmask8 5434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5435 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B) 5436 { 5437 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A, 5438 (__v8si) __B, 0, 5439 (__mmask8) -1); 5440 } 5441 5442 extern __inline __mmask8 5443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5444 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B) 5445 { 5446 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A, 5447 (__v8si) __B, 5448 (__mmask8) -1); 5449 } 5450 5451 extern __inline __mmask8 5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5453 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5454 { 5455 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A, 5456 (__v8si) __B, 0, __U); 5457 } 5458 5459 extern __inline __mmask8 5460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5461 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5462 { 5463 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A, 5464 (__v8si) __B, __U); 5465 } 5466 5467 extern __inline __mmask8 5468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5469 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B) 5470 { 5471 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A, 5472 (__v2di) __B, 0, 5473 (__mmask8) -1); 5474 } 5475 5476 extern __inline __mmask8 5477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5478 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B) 5479 { 5480 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A, 5481 (__v2di) __B, 5482 (__mmask8) -1); 5483 } 5484 5485 extern __inline __mmask8 5486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5487 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5488 { 5489 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A, 5490 (__v2di) __B, 0, __U); 5491 } 5492 5493 extern __inline __mmask8 5494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5495 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5496 { 5497 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A, 5498 (__v2di) __B, __U); 5499 } 5500 5501 extern __inline __mmask8 5502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5503 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B) 5504 { 5505 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A, 5506 (__v4di) __B, 0, 5507 (__mmask8) -1); 5508 } 5509 5510 extern __inline __mmask8 5511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5512 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B) 5513 { 5514 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A, 5515 (__v4di) __B, 5516 (__mmask8) -1); 5517 } 5518 5519 extern __inline __mmask8 5520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5521 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5522 { 5523 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A, 5524 (__v4di) __B, 0, __U); 5525 } 5526 5527 extern __inline __mmask8 5528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5529 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5530 { 5531 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A, 5532 (__v4di) __B, __U); 5533 } 5534 5535 extern __inline __mmask8 5536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5537 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B) 5538 { 5539 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A, 5540 (__v4si) __B, 6, 5541 (__mmask8) -1); 5542 } 5543 5544 extern __inline __mmask8 5545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5546 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B) 5547 { 5548 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A, 5549 (__v4si) __B, 5550 (__mmask8) -1); 5551 } 5552 5553 extern __inline __mmask8 5554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5555 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5556 { 5557 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A, 5558 (__v4si) __B, 6, __U); 5559 } 5560 5561 extern __inline __mmask8 5562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5563 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5564 { 5565 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A, 5566 (__v4si) __B, __U); 5567 } 5568 5569 extern __inline __mmask8 5570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5571 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B) 5572 { 5573 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A, 5574 (__v8si) __B, 6, 5575 (__mmask8) -1); 5576 } 5577 5578 extern __inline __mmask8 5579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5580 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B) 5581 { 5582 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A, 5583 (__v8si) __B, 5584 (__mmask8) -1); 5585 } 5586 5587 extern __inline __mmask8 5588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5589 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5590 { 5591 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A, 5592 (__v8si) __B, 6, __U); 5593 } 5594 5595 extern __inline __mmask8 5596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5597 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5598 { 5599 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A, 5600 (__v8si) __B, __U); 5601 } 5602 5603 extern __inline __mmask8 5604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5605 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B) 5606 { 5607 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A, 5608 (__v2di) __B, 6, 5609 (__mmask8) -1); 5610 } 5611 5612 extern __inline __mmask8 5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5614 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B) 5615 { 5616 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A, 5617 (__v2di) __B, 5618 (__mmask8) -1); 5619 } 5620 5621 extern __inline __mmask8 5622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5623 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5624 { 5625 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A, 5626 (__v2di) __B, 6, __U); 5627 } 5628 5629 extern __inline __mmask8 5630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5631 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5632 { 5633 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A, 5634 (__v2di) __B, __U); 5635 } 5636 5637 extern __inline __mmask8 5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5639 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B) 5640 { 5641 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A, 5642 (__v4di) __B, 6, 5643 (__mmask8) -1); 5644 } 5645 5646 extern __inline __mmask8 5647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5648 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B) 5649 { 5650 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A, 5651 (__v4di) __B, 5652 (__mmask8) -1); 5653 } 5654 5655 extern __inline __mmask8 5656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5657 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5658 { 5659 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A, 5660 (__v4di) __B, 6, __U); 5661 } 5662 5663 extern __inline __mmask8 5664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5665 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5666 { 5667 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A, 5668 (__v4di) __B, __U); 5669 } 5670 5671 extern __inline __mmask8 5672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5673 _mm_test_epi32_mask (__m128i __A, __m128i __B) 5674 { 5675 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 5676 (__v4si) __B, 5677 (__mmask8) -1); 5678 } 5679 5680 extern __inline __mmask8 5681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5682 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5683 { 5684 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 5685 (__v4si) __B, __U); 5686 } 5687 5688 extern __inline __mmask8 5689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5690 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 5691 { 5692 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 5693 (__v8si) __B, 5694 (__mmask8) -1); 5695 } 5696 5697 extern __inline __mmask8 5698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5699 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5700 { 5701 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 5702 (__v8si) __B, __U); 5703 } 5704 5705 extern __inline __mmask8 5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5707 _mm_test_epi64_mask (__m128i __A, __m128i __B) 5708 { 5709 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 5710 (__v2di) __B, 5711 (__mmask8) -1); 5712 } 5713 5714 extern __inline __mmask8 5715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5716 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5717 { 5718 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 5719 (__v2di) __B, __U); 5720 } 5721 5722 extern __inline __mmask8 5723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5724 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 5725 { 5726 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 5727 (__v4di) __B, 5728 (__mmask8) -1); 5729 } 5730 5731 extern __inline __mmask8 5732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5733 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5734 { 5735 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 5736 (__v4di) __B, __U); 5737 } 5738 5739 extern __inline __mmask8 5740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5741 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 5742 { 5743 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 5744 (__v4si) __B, 5745 (__mmask8) -1); 5746 } 5747 5748 extern __inline __mmask8 5749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5750 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 5751 { 5752 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 5753 (__v4si) __B, __U); 5754 } 5755 5756 extern __inline __mmask8 5757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5758 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 5759 { 5760 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 5761 (__v8si) __B, 5762 (__mmask8) -1); 5763 } 5764 5765 extern __inline __mmask8 5766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5767 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 5768 { 5769 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 5770 (__v8si) __B, __U); 5771 } 5772 5773 extern __inline __mmask8 5774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5775 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 5776 { 5777 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 5778 (__v2di) __B, 5779 (__mmask8) -1); 5780 } 5781 5782 extern __inline __mmask8 5783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5784 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 5785 { 5786 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 5787 (__v2di) __B, __U); 5788 } 5789 5790 extern __inline __mmask8 5791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5792 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 5793 { 5794 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 5795 (__v4di) __B, 5796 (__mmask8) -1); 5797 } 5798 5799 extern __inline __mmask8 5800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5801 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 5802 { 5803 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 5804 (__v4di) __B, __U); 5805 } 5806 5807 extern __inline __m256d 5808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5809 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) 5810 { 5811 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 5812 (__v4df) __W, 5813 (__mmask8) __U); 5814 } 5815 5816 extern __inline __m256d 5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5818 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) 5819 { 5820 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 5821 (__v4df) 5822 _mm256_setzero_pd (), 5823 (__mmask8) __U); 5824 } 5825 5826 extern __inline void 5827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5828 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) 5829 { 5830 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 5831 (__v4df) __A, 5832 (__mmask8) __U); 5833 } 5834 5835 extern __inline __m128d 5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5837 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) 5838 { 5839 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 5840 (__v2df) __W, 5841 (__mmask8) __U); 5842 } 5843 5844 extern __inline __m128d 5845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5846 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) 5847 { 5848 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 5849 (__v2df) 5850 _mm_setzero_pd (), 5851 (__mmask8) __U); 5852 } 5853 5854 extern __inline void 5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5856 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) 5857 { 5858 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 5859 (__v2df) __A, 5860 (__mmask8) __U); 5861 } 5862 5863 extern __inline __m256 5864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5865 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) 5866 { 5867 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 5868 (__v8sf) __W, 5869 (__mmask8) __U); 5870 } 5871 5872 extern __inline __m256 5873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5874 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) 5875 { 5876 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 5877 (__v8sf) 5878 _mm256_setzero_ps (), 5879 (__mmask8) __U); 5880 } 5881 5882 extern __inline void 5883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5884 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) 5885 { 5886 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 5887 (__v8sf) __A, 5888 (__mmask8) __U); 5889 } 5890 5891 extern __inline __m128 5892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5893 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) 5894 { 5895 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 5896 (__v4sf) __W, 5897 (__mmask8) __U); 5898 } 5899 5900 extern __inline __m128 5901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5902 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) 5903 { 5904 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 5905 (__v4sf) 5906 _mm_setzero_ps (), 5907 (__mmask8) __U); 5908 } 5909 5910 extern __inline void 5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5912 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) 5913 { 5914 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 5915 (__v4sf) __A, 5916 (__mmask8) __U); 5917 } 5918 5919 extern __inline __m256i 5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5921 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5922 { 5923 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 5924 (__v4di) __W, 5925 (__mmask8) __U); 5926 } 5927 5928 extern __inline __m256i 5929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5930 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) 5931 { 5932 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 5933 (__v4di) 5934 _mm256_setzero_si256 (), 5935 (__mmask8) __U); 5936 } 5937 5938 extern __inline void 5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5940 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5941 { 5942 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 5943 (__v4di) __A, 5944 (__mmask8) __U); 5945 } 5946 5947 extern __inline __m128i 5948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5949 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5950 { 5951 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 5952 (__v2di) __W, 5953 (__mmask8) __U); 5954 } 5955 5956 extern __inline __m128i 5957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5958 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) 5959 { 5960 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 5961 (__v2di) 5962 _mm_setzero_si128 (), 5963 (__mmask8) __U); 5964 } 5965 5966 extern __inline void 5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5968 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5969 { 5970 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 5971 (__v2di) __A, 5972 (__mmask8) __U); 5973 } 5974 5975 extern __inline __m256i 5976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5977 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5978 { 5979 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 5980 (__v8si) __W, 5981 (__mmask8) __U); 5982 } 5983 5984 extern __inline __m256i 5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5986 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) 5987 { 5988 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 5989 (__v8si) 5990 _mm256_setzero_si256 (), 5991 (__mmask8) __U); 5992 } 5993 5994 extern __inline void 5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5996 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5997 { 5998 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 5999 (__v8si) __A, 6000 (__mmask8) __U); 6001 } 6002 6003 extern __inline __m128i 6004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6005 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 6006 { 6007 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 6008 (__v4si) __W, 6009 (__mmask8) __U); 6010 } 6011 6012 extern __inline __m128i 6013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6014 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) 6015 { 6016 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 6017 (__v4si) 6018 _mm_setzero_si128 (), 6019 (__mmask8) __U); 6020 } 6021 6022 extern __inline void 6023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6024 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) 6025 { 6026 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 6027 (__v4si) __A, 6028 (__mmask8) __U); 6029 } 6030 6031 extern __inline __m256d 6032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6033 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) 6034 { 6035 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 6036 (__v4df) __W, 6037 (__mmask8) __U); 6038 } 6039 6040 extern __inline __m256d 6041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6042 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) 6043 { 6044 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A, 6045 (__v4df) 6046 _mm256_setzero_pd (), 6047 (__mmask8) __U); 6048 } 6049 6050 extern __inline __m256d 6051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6052 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) 6053 { 6054 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 6055 (__v4df) __W, 6056 (__mmask8) 6057 __U); 6058 } 6059 6060 extern __inline __m256d 6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6062 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 6063 { 6064 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P, 6065 (__v4df) 6066 _mm256_setzero_pd (), 6067 (__mmask8) 6068 __U); 6069 } 6070 6071 extern __inline __m128d 6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6073 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) 6074 { 6075 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 6076 (__v2df) __W, 6077 (__mmask8) __U); 6078 } 6079 6080 extern __inline __m128d 6081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6082 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) 6083 { 6084 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A, 6085 (__v2df) 6086 _mm_setzero_pd (), 6087 (__mmask8) __U); 6088 } 6089 6090 extern __inline __m128d 6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6092 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) 6093 { 6094 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 6095 (__v2df) __W, 6096 (__mmask8) 6097 __U); 6098 } 6099 6100 extern __inline __m128d 6101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6102 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 6103 { 6104 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P, 6105 (__v2df) 6106 _mm_setzero_pd (), 6107 (__mmask8) 6108 __U); 6109 } 6110 6111 extern __inline __m256 6112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6113 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) 6114 { 6115 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 6116 (__v8sf) __W, 6117 (__mmask8) __U); 6118 } 6119 6120 extern __inline __m256 6121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6122 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) 6123 { 6124 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A, 6125 (__v8sf) 6126 _mm256_setzero_ps (), 6127 (__mmask8) __U); 6128 } 6129 6130 extern __inline __m256 6131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6132 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) 6133 { 6134 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 6135 (__v8sf) __W, 6136 (__mmask8) __U); 6137 } 6138 6139 extern __inline __m256 6140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6141 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) 6142 { 6143 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P, 6144 (__v8sf) 6145 _mm256_setzero_ps (), 6146 (__mmask8) 6147 __U); 6148 } 6149 6150 extern __inline __m128 6151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6152 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) 6153 { 6154 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 6155 (__v4sf) __W, 6156 (__mmask8) __U); 6157 } 6158 6159 extern __inline __m128 6160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6161 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) 6162 { 6163 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A, 6164 (__v4sf) 6165 _mm_setzero_ps (), 6166 (__mmask8) __U); 6167 } 6168 6169 extern __inline __m128 6170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6171 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) 6172 { 6173 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 6174 (__v4sf) __W, 6175 (__mmask8) __U); 6176 } 6177 6178 extern __inline __m128 6179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6180 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) 6181 { 6182 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P, 6183 (__v4sf) 6184 _mm_setzero_ps (), 6185 (__mmask8) 6186 __U); 6187 } 6188 6189 extern __inline __m256i 6190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6191 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 6192 { 6193 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 6194 (__v4di) __W, 6195 (__mmask8) __U); 6196 } 6197 6198 extern __inline __m256i 6199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6200 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) 6201 { 6202 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A, 6203 (__v4di) 6204 _mm256_setzero_si256 (), 6205 (__mmask8) __U); 6206 } 6207 6208 extern __inline __m256i 6209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6210 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 6211 void const *__P) 6212 { 6213 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 6214 (__v4di) __W, 6215 (__mmask8) 6216 __U); 6217 } 6218 6219 extern __inline __m256i 6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6221 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 6222 { 6223 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P, 6224 (__v4di) 6225 _mm256_setzero_si256 (), 6226 (__mmask8) 6227 __U); 6228 } 6229 6230 extern __inline __m128i 6231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6232 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 6233 { 6234 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 6235 (__v2di) __W, 6236 (__mmask8) __U); 6237 } 6238 6239 extern __inline __m128i 6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6241 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) 6242 { 6243 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A, 6244 (__v2di) 6245 _mm_setzero_si128 (), 6246 (__mmask8) __U); 6247 } 6248 6249 extern __inline __m128i 6250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6251 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 6252 { 6253 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 6254 (__v2di) __W, 6255 (__mmask8) 6256 __U); 6257 } 6258 6259 extern __inline __m128i 6260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6261 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 6262 { 6263 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P, 6264 (__v2di) 6265 _mm_setzero_si128 (), 6266 (__mmask8) 6267 __U); 6268 } 6269 6270 extern __inline __m256i 6271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6272 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 6273 { 6274 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 6275 (__v8si) __W, 6276 (__mmask8) __U); 6277 } 6278 6279 extern __inline __m256i 6280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6281 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) 6282 { 6283 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A, 6284 (__v8si) 6285 _mm256_setzero_si256 (), 6286 (__mmask8) __U); 6287 } 6288 6289 extern __inline __m256i 6290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6291 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 6292 void const *__P) 6293 { 6294 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 6295 (__v8si) __W, 6296 (__mmask8) 6297 __U); 6298 } 6299 6300 extern __inline __m256i 6301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6302 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) 6303 { 6304 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P, 6305 (__v8si) 6306 _mm256_setzero_si256 (), 6307 (__mmask8) 6308 __U); 6309 } 6310 6311 extern __inline __m128i 6312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6313 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 6314 { 6315 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 6316 (__v4si) __W, 6317 (__mmask8) __U); 6318 } 6319 6320 extern __inline __m128i 6321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6322 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) 6323 { 6324 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A, 6325 (__v4si) 6326 _mm_setzero_si128 (), 6327 (__mmask8) __U); 6328 } 6329 6330 extern __inline __m128i 6331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6332 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 6333 { 6334 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 6335 (__v4si) __W, 6336 (__mmask8) 6337 __U); 6338 } 6339 6340 extern __inline __m128i 6341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6342 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) 6343 { 6344 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P, 6345 (__v4si) 6346 _mm_setzero_si128 (), 6347 (__mmask8) 6348 __U); 6349 } 6350 6351 extern __inline __m256d 6352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6353 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) 6354 { 6355 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 6356 /* idx */ , 6357 (__v4df) __A, 6358 (__v4df) __B, 6359 (__mmask8) -1); 6360 } 6361 6362 extern __inline __m256d 6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6364 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, 6365 __m256d __B) 6366 { 6367 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 6368 /* idx */ , 6369 (__v4df) __A, 6370 (__v4df) __B, 6371 (__mmask8) 6372 __U); 6373 } 6374 6375 extern __inline __m256d 6376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6377 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, 6378 __m256d __B) 6379 { 6380 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, 6381 (__v4di) __I 6382 /* idx */ , 6383 (__v4df) __B, 6384 (__mmask8) 6385 __U); 6386 } 6387 6388 extern __inline __m256d 6389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6390 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, 6391 __m256d __B) 6392 { 6393 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I 6394 /* idx */ , 6395 (__v4df) __A, 6396 (__v4df) __B, 6397 (__mmask8) 6398 __U); 6399 } 6400 6401 extern __inline __m256 6402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6403 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) 6404 { 6405 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 6406 /* idx */ , 6407 (__v8sf) __A, 6408 (__v8sf) __B, 6409 (__mmask8) -1); 6410 } 6411 6412 extern __inline __m256 6413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6414 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, 6415 __m256 __B) 6416 { 6417 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 6418 /* idx */ , 6419 (__v8sf) __A, 6420 (__v8sf) __B, 6421 (__mmask8) __U); 6422 } 6423 6424 extern __inline __m256 6425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6426 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, 6427 __m256 __B) 6428 { 6429 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, 6430 (__v8si) __I 6431 /* idx */ , 6432 (__v8sf) __B, 6433 (__mmask8) __U); 6434 } 6435 6436 extern __inline __m256 6437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6438 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, 6439 __m256 __B) 6440 { 6441 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I 6442 /* idx */ , 6443 (__v8sf) __A, 6444 (__v8sf) __B, 6445 (__mmask8) 6446 __U); 6447 } 6448 6449 extern __inline __m128i 6450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6451 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) 6452 { 6453 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 6454 /* idx */ , 6455 (__v2di) __A, 6456 (__v2di) __B, 6457 (__mmask8) -1); 6458 } 6459 6460 extern __inline __m128i 6461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6462 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, 6463 __m128i __B) 6464 { 6465 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 6466 /* idx */ , 6467 (__v2di) __A, 6468 (__v2di) __B, 6469 (__mmask8) __U); 6470 } 6471 6472 extern __inline __m128i 6473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6474 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, 6475 __m128i __B) 6476 { 6477 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, 6478 (__v2di) __I 6479 /* idx */ , 6480 (__v2di) __B, 6481 (__mmask8) __U); 6482 } 6483 6484 extern __inline __m128i 6485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6486 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, 6487 __m128i __B) 6488 { 6489 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I 6490 /* idx */ , 6491 (__v2di) __A, 6492 (__v2di) __B, 6493 (__mmask8) 6494 __U); 6495 } 6496 6497 extern __inline __m128i 6498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6499 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) 6500 { 6501 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 6502 /* idx */ , 6503 (__v4si) __A, 6504 (__v4si) __B, 6505 (__mmask8) -1); 6506 } 6507 6508 extern __inline __m128i 6509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6510 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, 6511 __m128i __B) 6512 { 6513 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 6514 /* idx */ , 6515 (__v4si) __A, 6516 (__v4si) __B, 6517 (__mmask8) __U); 6518 } 6519 6520 extern __inline __m128i 6521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6522 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, 6523 __m128i __B) 6524 { 6525 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, 6526 (__v4si) __I 6527 /* idx */ , 6528 (__v4si) __B, 6529 (__mmask8) __U); 6530 } 6531 6532 extern __inline __m128i 6533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6534 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, 6535 __m128i __B) 6536 { 6537 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I 6538 /* idx */ , 6539 (__v4si) __A, 6540 (__v4si) __B, 6541 (__mmask8) 6542 __U); 6543 } 6544 6545 extern __inline __m256i 6546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6547 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) 6548 { 6549 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 6550 /* idx */ , 6551 (__v4di) __A, 6552 (__v4di) __B, 6553 (__mmask8) -1); 6554 } 6555 6556 extern __inline __m256i 6557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6558 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, 6559 __m256i __B) 6560 { 6561 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 6562 /* idx */ , 6563 (__v4di) __A, 6564 (__v4di) __B, 6565 (__mmask8) __U); 6566 } 6567 6568 extern __inline __m256i 6569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6570 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, 6571 __mmask8 __U, __m256i __B) 6572 { 6573 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, 6574 (__v4di) __I 6575 /* idx */ , 6576 (__v4di) __B, 6577 (__mmask8) __U); 6578 } 6579 6580 extern __inline __m256i 6581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6582 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, 6583 __m256i __I, __m256i __B) 6584 { 6585 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I 6586 /* idx */ , 6587 (__v4di) __A, 6588 (__v4di) __B, 6589 (__mmask8) 6590 __U); 6591 } 6592 6593 extern __inline __m256i 6594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6595 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) 6596 { 6597 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 6598 /* idx */ , 6599 (__v8si) __A, 6600 (__v8si) __B, 6601 (__mmask8) -1); 6602 } 6603 6604 extern __inline __m256i 6605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6606 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, 6607 __m256i __B) 6608 { 6609 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 6610 /* idx */ , 6611 (__v8si) __A, 6612 (__v8si) __B, 6613 (__mmask8) __U); 6614 } 6615 6616 extern __inline __m256i 6617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6618 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, 6619 __mmask8 __U, __m256i __B) 6620 { 6621 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, 6622 (__v8si) __I 6623 /* idx */ , 6624 (__v8si) __B, 6625 (__mmask8) __U); 6626 } 6627 6628 extern __inline __m256i 6629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6630 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, 6631 __m256i __I, __m256i __B) 6632 { 6633 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I 6634 /* idx */ , 6635 (__v8si) __A, 6636 (__v8si) __B, 6637 (__mmask8) 6638 __U); 6639 } 6640 6641 extern __inline __m128d 6642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6643 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) 6644 { 6645 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 6646 /* idx */ , 6647 (__v2df) __A, 6648 (__v2df) __B, 6649 (__mmask8) -1); 6650 } 6651 6652 extern __inline __m128d 6653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6654 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, 6655 __m128d __B) 6656 { 6657 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 6658 /* idx */ , 6659 (__v2df) __A, 6660 (__v2df) __B, 6661 (__mmask8) 6662 __U); 6663 } 6664 6665 extern __inline __m128d 6666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6667 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, 6668 __m128d __B) 6669 { 6670 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, 6671 (__v2di) __I 6672 /* idx */ , 6673 (__v2df) __B, 6674 (__mmask8) 6675 __U); 6676 } 6677 6678 extern __inline __m128d 6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6680 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, 6681 __m128d __B) 6682 { 6683 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I 6684 /* idx */ , 6685 (__v2df) __A, 6686 (__v2df) __B, 6687 (__mmask8) 6688 __U); 6689 } 6690 6691 extern __inline __m128 6692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6693 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) 6694 { 6695 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 6696 /* idx */ , 6697 (__v4sf) __A, 6698 (__v4sf) __B, 6699 (__mmask8) -1); 6700 } 6701 6702 extern __inline __m128 6703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6704 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, 6705 __m128 __B) 6706 { 6707 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 6708 /* idx */ , 6709 (__v4sf) __A, 6710 (__v4sf) __B, 6711 (__mmask8) __U); 6712 } 6713 6714 extern __inline __m128 6715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6716 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, 6717 __m128 __B) 6718 { 6719 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, 6720 (__v4si) __I 6721 /* idx */ , 6722 (__v4sf) __B, 6723 (__mmask8) __U); 6724 } 6725 6726 extern __inline __m128 6727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6728 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, 6729 __m128 __B) 6730 { 6731 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I 6732 /* idx */ , 6733 (__v4sf) __A, 6734 (__v4sf) __B, 6735 (__mmask8) 6736 __U); 6737 } 6738 6739 extern __inline __m128i 6740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6741 _mm_srav_epi64 (__m128i __X, __m128i __Y) 6742 { 6743 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 6744 (__v2di) __Y, 6745 (__v2di) 6746 _mm_setzero_si128 (), 6747 (__mmask8) -1); 6748 } 6749 6750 extern __inline __m128i 6751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6752 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 6753 __m128i __Y) 6754 { 6755 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 6756 (__v2di) __Y, 6757 (__v2di) __W, 6758 (__mmask8) __U); 6759 } 6760 6761 extern __inline __m128i 6762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6763 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 6764 { 6765 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 6766 (__v2di) __Y, 6767 (__v2di) 6768 _mm_setzero_si128 (), 6769 (__mmask8) __U); 6770 } 6771 6772 extern __inline __m256i 6773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6774 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 6775 __m256i __Y) 6776 { 6777 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X, 6778 (__v8si) __Y, 6779 (__v8si) __W, 6780 (__mmask8) __U); 6781 } 6782 6783 extern __inline __m256i 6784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6785 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 6786 { 6787 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X, 6788 (__v8si) __Y, 6789 (__v8si) 6790 _mm256_setzero_si256 (), 6791 (__mmask8) __U); 6792 } 6793 6794 extern __inline __m128i 6795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6796 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 6797 __m128i __Y) 6798 { 6799 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X, 6800 (__v4si) __Y, 6801 (__v4si) __W, 6802 (__mmask8) __U); 6803 } 6804 6805 extern __inline __m128i 6806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6807 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 6808 { 6809 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X, 6810 (__v4si) __Y, 6811 (__v4si) 6812 _mm_setzero_si128 (), 6813 (__mmask8) __U); 6814 } 6815 6816 extern __inline __m256i 6817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6818 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 6819 __m256i __Y) 6820 { 6821 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X, 6822 (__v4di) __Y, 6823 (__v4di) __W, 6824 (__mmask8) __U); 6825 } 6826 6827 extern __inline __m256i 6828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6829 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 6830 { 6831 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X, 6832 (__v4di) __Y, 6833 (__v4di) 6834 _mm256_setzero_si256 (), 6835 (__mmask8) __U); 6836 } 6837 6838 extern __inline __m128i 6839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6840 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 6841 __m128i __Y) 6842 { 6843 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X, 6844 (__v2di) __Y, 6845 (__v2di) __W, 6846 (__mmask8) __U); 6847 } 6848 6849 extern __inline __m128i 6850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6851 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 6852 { 6853 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X, 6854 (__v2di) __Y, 6855 (__v2di) 6856 _mm_setzero_si128 (), 6857 (__mmask8) __U); 6858 } 6859 6860 extern __inline __m256i 6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6862 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 6863 __m256i __Y) 6864 { 6865 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X, 6866 (__v8si) __Y, 6867 (__v8si) __W, 6868 (__mmask8) __U); 6869 } 6870 6871 extern __inline __m256i 6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6873 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 6874 { 6875 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X, 6876 (__v8si) __Y, 6877 (__v8si) 6878 _mm256_setzero_si256 (), 6879 (__mmask8) __U); 6880 } 6881 6882 extern __inline __m128i 6883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6884 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 6885 __m128i __Y) 6886 { 6887 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X, 6888 (__v4si) __Y, 6889 (__v4si) __W, 6890 (__mmask8) __U); 6891 } 6892 6893 extern __inline __m128i 6894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6895 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 6896 { 6897 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X, 6898 (__v4si) __Y, 6899 (__v4si) 6900 _mm_setzero_si128 (), 6901 (__mmask8) __U); 6902 } 6903 6904 extern __inline __m256i 6905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6906 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 6907 __m256i __Y) 6908 { 6909 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X, 6910 (__v8si) __Y, 6911 (__v8si) __W, 6912 (__mmask8) __U); 6913 } 6914 6915 extern __inline __m256i 6916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6917 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 6918 { 6919 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X, 6920 (__v8si) __Y, 6921 (__v8si) 6922 _mm256_setzero_si256 (), 6923 (__mmask8) __U); 6924 } 6925 6926 extern __inline __m128i 6927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6928 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 6929 __m128i __Y) 6930 { 6931 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X, 6932 (__v4si) __Y, 6933 (__v4si) __W, 6934 (__mmask8) __U); 6935 } 6936 6937 extern __inline __m128i 6938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6939 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 6940 { 6941 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X, 6942 (__v4si) __Y, 6943 (__v4si) 6944 _mm_setzero_si128 (), 6945 (__mmask8) __U); 6946 } 6947 6948 extern __inline __m256i 6949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6950 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 6951 __m256i __Y) 6952 { 6953 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X, 6954 (__v4di) __Y, 6955 (__v4di) __W, 6956 (__mmask8) __U); 6957 } 6958 6959 extern __inline __m256i 6960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6961 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 6962 { 6963 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X, 6964 (__v4di) __Y, 6965 (__v4di) 6966 _mm256_setzero_si256 (), 6967 (__mmask8) __U); 6968 } 6969 6970 extern __inline __m128i 6971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6972 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 6973 __m128i __Y) 6974 { 6975 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X, 6976 (__v2di) __Y, 6977 (__v2di) __W, 6978 (__mmask8) __U); 6979 } 6980 6981 extern __inline __m128i 6982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6983 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 6984 { 6985 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X, 6986 (__v2di) __Y, 6987 (__v2di) 6988 _mm_setzero_si128 (), 6989 (__mmask8) __U); 6990 } 6991 6992 extern __inline __m256i 6993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6994 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 6995 { 6996 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 6997 (__v8si) __B, 6998 (__v8si) 6999 _mm256_setzero_si256 (), 7000 (__mmask8) -1); 7001 } 7002 7003 extern __inline __m256i 7004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7005 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 7006 __m256i __B) 7007 { 7008 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 7009 (__v8si) __B, 7010 (__v8si) __W, 7011 (__mmask8) __U); 7012 } 7013 7014 extern __inline __m256i 7015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7016 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 7017 { 7018 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 7019 (__v8si) __B, 7020 (__v8si) 7021 _mm256_setzero_si256 (), 7022 (__mmask8) __U); 7023 } 7024 7025 extern __inline __m128i 7026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7027 _mm_rolv_epi32 (__m128i __A, __m128i __B) 7028 { 7029 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 7030 (__v4si) __B, 7031 (__v4si) 7032 _mm_setzero_si128 (), 7033 (__mmask8) -1); 7034 } 7035 7036 extern __inline __m128i 7037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7038 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 7039 __m128i __B) 7040 { 7041 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 7042 (__v4si) __B, 7043 (__v4si) __W, 7044 (__mmask8) __U); 7045 } 7046 7047 extern __inline __m128i 7048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7049 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 7050 { 7051 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 7052 (__v4si) __B, 7053 (__v4si) 7054 _mm_setzero_si128 (), 7055 (__mmask8) __U); 7056 } 7057 7058 extern __inline __m256i 7059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7060 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 7061 { 7062 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 7063 (__v8si) __B, 7064 (__v8si) 7065 _mm256_setzero_si256 (), 7066 (__mmask8) -1); 7067 } 7068 7069 extern __inline __m256i 7070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7071 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 7072 __m256i __B) 7073 { 7074 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 7075 (__v8si) __B, 7076 (__v8si) __W, 7077 (__mmask8) __U); 7078 } 7079 7080 extern __inline __m256i 7081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7082 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 7083 { 7084 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 7085 (__v8si) __B, 7086 (__v8si) 7087 _mm256_setzero_si256 (), 7088 (__mmask8) __U); 7089 } 7090 7091 extern __inline __m128i 7092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7093 _mm_rorv_epi32 (__m128i __A, __m128i __B) 7094 { 7095 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 7096 (__v4si) __B, 7097 (__v4si) 7098 _mm_setzero_si128 (), 7099 (__mmask8) -1); 7100 } 7101 7102 extern __inline __m128i 7103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7104 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 7105 __m128i __B) 7106 { 7107 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 7108 (__v4si) __B, 7109 (__v4si) __W, 7110 (__mmask8) __U); 7111 } 7112 7113 extern __inline __m128i 7114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7115 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 7116 { 7117 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 7118 (__v4si) __B, 7119 (__v4si) 7120 _mm_setzero_si128 (), 7121 (__mmask8) __U); 7122 } 7123 7124 extern __inline __m256i 7125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7126 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 7127 { 7128 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 7129 (__v4di) __B, 7130 (__v4di) 7131 _mm256_setzero_si256 (), 7132 (__mmask8) -1); 7133 } 7134 7135 extern __inline __m256i 7136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7137 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7138 __m256i __B) 7139 { 7140 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 7141 (__v4di) __B, 7142 (__v4di) __W, 7143 (__mmask8) __U); 7144 } 7145 7146 extern __inline __m256i 7147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7148 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7149 { 7150 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 7151 (__v4di) __B, 7152 (__v4di) 7153 _mm256_setzero_si256 (), 7154 (__mmask8) __U); 7155 } 7156 7157 extern __inline __m128i 7158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7159 _mm_rolv_epi64 (__m128i __A, __m128i __B) 7160 { 7161 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 7162 (__v2di) __B, 7163 (__v2di) 7164 _mm_setzero_si128 (), 7165 (__mmask8) -1); 7166 } 7167 7168 extern __inline __m128i 7169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7170 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7171 __m128i __B) 7172 { 7173 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 7174 (__v2di) __B, 7175 (__v2di) __W, 7176 (__mmask8) __U); 7177 } 7178 7179 extern __inline __m128i 7180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7181 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7182 { 7183 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 7184 (__v2di) __B, 7185 (__v2di) 7186 _mm_setzero_si128 (), 7187 (__mmask8) __U); 7188 } 7189 7190 extern __inline __m256i 7191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7192 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 7193 { 7194 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 7195 (__v4di) __B, 7196 (__v4di) 7197 _mm256_setzero_si256 (), 7198 (__mmask8) -1); 7199 } 7200 7201 extern __inline __m256i 7202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7203 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7204 __m256i __B) 7205 { 7206 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 7207 (__v4di) __B, 7208 (__v4di) __W, 7209 (__mmask8) __U); 7210 } 7211 7212 extern __inline __m256i 7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7214 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7215 { 7216 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 7217 (__v4di) __B, 7218 (__v4di) 7219 _mm256_setzero_si256 (), 7220 (__mmask8) __U); 7221 } 7222 7223 extern __inline __m128i 7224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7225 _mm_rorv_epi64 (__m128i __A, __m128i __B) 7226 { 7227 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 7228 (__v2di) __B, 7229 (__v2di) 7230 _mm_setzero_si128 (), 7231 (__mmask8) -1); 7232 } 7233 7234 extern __inline __m128i 7235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7236 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7237 __m128i __B) 7238 { 7239 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 7240 (__v2di) __B, 7241 (__v2di) __W, 7242 (__mmask8) __U); 7243 } 7244 7245 extern __inline __m128i 7246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7247 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7248 { 7249 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 7250 (__v2di) __B, 7251 (__v2di) 7252 _mm_setzero_si128 (), 7253 (__mmask8) __U); 7254 } 7255 7256 extern __inline __m256i 7257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7258 _mm256_srav_epi64 (__m256i __X, __m256i __Y) 7259 { 7260 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 7261 (__v4di) __Y, 7262 (__v4di) 7263 _mm256_setzero_si256 (), 7264 (__mmask8) -1); 7265 } 7266 7267 extern __inline __m256i 7268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7269 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 7270 __m256i __Y) 7271 { 7272 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 7273 (__v4di) __Y, 7274 (__v4di) __W, 7275 (__mmask8) __U); 7276 } 7277 7278 extern __inline __m256i 7279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7280 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 7281 { 7282 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 7283 (__v4di) __Y, 7284 (__v4di) 7285 _mm256_setzero_si256 (), 7286 (__mmask8) __U); 7287 } 7288 7289 extern __inline __m256i 7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7291 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7292 __m256i __B) 7293 { 7294 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A, 7295 (__v4di) __B, 7296 (__v4di) __W, __U); 7297 } 7298 7299 extern __inline __m256i 7300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7301 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7302 { 7303 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A, 7304 (__v4di) __B, 7305 (__v4di) 7306 _mm256_setzero_pd (), 7307 __U); 7308 } 7309 7310 extern __inline __m128i 7311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7312 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7313 __m128i __B) 7314 { 7315 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A, 7316 (__v2di) __B, 7317 (__v2di) __W, __U); 7318 } 7319 7320 extern __inline __m128i 7321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7322 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7323 { 7324 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A, 7325 (__v2di) __B, 7326 (__v2di) 7327 _mm_setzero_pd (), 7328 __U); 7329 } 7330 7331 extern __inline __m256i 7332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7333 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7334 __m256i __B) 7335 { 7336 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A, 7337 (__v4di) __B, 7338 (__v4di) __W, __U); 7339 } 7340 7341 extern __inline __m256i 7342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7343 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7344 { 7345 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A, 7346 (__v4di) __B, 7347 (__v4di) 7348 _mm256_setzero_pd (), 7349 __U); 7350 } 7351 7352 extern __inline __m128i 7353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7354 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7355 __m128i __B) 7356 { 7357 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A, 7358 (__v2di) __B, 7359 (__v2di) __W, __U); 7360 } 7361 7362 extern __inline __m128i 7363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7364 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7365 { 7366 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A, 7367 (__v2di) __B, 7368 (__v2di) 7369 _mm_setzero_pd (), 7370 __U); 7371 } 7372 7373 extern __inline __m256i 7374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7375 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7376 __m256i __B) 7377 { 7378 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A, 7379 (__v4di) __B, 7380 (__v4di) __W, 7381 (__mmask8) __U); 7382 } 7383 7384 extern __inline __m256i 7385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7386 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7387 { 7388 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A, 7389 (__v4di) __B, 7390 (__v4di) 7391 _mm256_setzero_si256 (), 7392 (__mmask8) __U); 7393 } 7394 7395 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 7396 _mm256_or_epi64 (__m256i __A, __m256i __B) 7397 { 7398 return (__m256i) ((__v4du)__A | (__v4du)__B); 7399 } 7400 7401 extern __inline __m128i 7402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7403 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 7404 { 7405 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, 7406 (__v2di) __B, 7407 (__v2di) __W, 7408 (__mmask8) __U); 7409 } 7410 7411 extern __inline __m128i 7412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7413 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7414 { 7415 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A, 7416 (__v2di) __B, 7417 (__v2di) 7418 _mm_setzero_si128 (), 7419 (__mmask8) __U); 7420 } 7421 7422 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 7423 _mm_or_epi64 (__m128i __A, __m128i __B) 7424 { 7425 return (__m128i) ((__v2du)__A | (__v2du)__B); 7426 } 7427 7428 extern __inline __m256i 7429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7430 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7431 __m256i __B) 7432 { 7433 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A, 7434 (__v4di) __B, 7435 (__v4di) __W, 7436 (__mmask8) __U); 7437 } 7438 7439 extern __inline __m256i 7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7441 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 7442 { 7443 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A, 7444 (__v4di) __B, 7445 (__v4di) 7446 _mm256_setzero_si256 (), 7447 (__mmask8) __U); 7448 } 7449 7450 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 7451 _mm256_xor_epi64 (__m256i __A, __m256i __B) 7452 { 7453 return (__m256i) ((__v4du)__A ^ (__v4du)__B); 7454 } 7455 7456 extern __inline __m128i 7457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7458 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7459 __m128i __B) 7460 { 7461 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A, 7462 (__v2di) __B, 7463 (__v2di) __W, 7464 (__mmask8) __U); 7465 } 7466 7467 extern __inline __m128i 7468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7469 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7470 { 7471 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A, 7472 (__v2di) __B, 7473 (__v2di) 7474 _mm_setzero_si128 (), 7475 (__mmask8) __U); 7476 } 7477 7478 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 7479 _mm_xor_epi64 (__m128i __A, __m128i __B) 7480 { 7481 return (__m128i) ((__v2du)__A ^ (__v2du)__B); 7482 } 7483 7484 extern __inline __m256d 7485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7486 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A, 7487 __m256d __B) 7488 { 7489 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A, 7490 (__v4df) __B, 7491 (__v4df) __W, 7492 (__mmask8) __U); 7493 } 7494 7495 extern __inline __m256d 7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7497 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B) 7498 { 7499 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A, 7500 (__v4df) __B, 7501 (__v4df) 7502 _mm256_setzero_pd (), 7503 (__mmask8) __U); 7504 } 7505 7506 extern __inline __m256 7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7508 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 7509 { 7510 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A, 7511 (__v8sf) __B, 7512 (__v8sf) __W, 7513 (__mmask8) __U); 7514 } 7515 7516 extern __inline __m256 7517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7518 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B) 7519 { 7520 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A, 7521 (__v8sf) __B, 7522 (__v8sf) 7523 _mm256_setzero_ps (), 7524 (__mmask8) __U); 7525 } 7526 7527 extern __inline __m128 7528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7529 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7530 { 7531 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A, 7532 (__v4sf) __B, 7533 (__v4sf) __W, 7534 (__mmask8) __U); 7535 } 7536 7537 extern __inline __m128 7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7539 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B) 7540 { 7541 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A, 7542 (__v4sf) __B, 7543 (__v4sf) 7544 _mm_setzero_ps (), 7545 (__mmask8) __U); 7546 } 7547 7548 extern __inline __m128d 7549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7550 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7551 { 7552 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A, 7553 (__v2df) __B, 7554 (__v2df) __W, 7555 (__mmask8) __U); 7556 } 7557 7558 extern __inline __m128d 7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7560 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B) 7561 { 7562 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A, 7563 (__v2df) __B, 7564 (__v2df) 7565 _mm_setzero_pd (), 7566 (__mmask8) __U); 7567 } 7568 7569 extern __inline __m256d 7570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7571 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A, 7572 __m256d __B) 7573 { 7574 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A, 7575 (__v4df) __B, 7576 (__v4df) __W, 7577 (__mmask8) __U); 7578 } 7579 7580 extern __inline __m256d 7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7582 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A, 7583 __m256d __B) 7584 { 7585 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A, 7586 (__v4df) __B, 7587 (__v4df) __W, 7588 (__mmask8) __U); 7589 } 7590 7591 extern __inline __m256d 7592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7593 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B) 7594 { 7595 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A, 7596 (__v4df) __B, 7597 (__v4df) 7598 _mm256_setzero_pd (), 7599 (__mmask8) __U); 7600 } 7601 7602 extern __inline __m256 7603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7604 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 7605 { 7606 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A, 7607 (__v8sf) __B, 7608 (__v8sf) __W, 7609 (__mmask8) __U); 7610 } 7611 7612 extern __inline __m256d 7613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7614 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B) 7615 { 7616 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A, 7617 (__v4df) __B, 7618 (__v4df) 7619 _mm256_setzero_pd (), 7620 (__mmask8) __U); 7621 } 7622 7623 extern __inline __m256 7624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7625 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 7626 { 7627 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A, 7628 (__v8sf) __B, 7629 (__v8sf) __W, 7630 (__mmask8) __U); 7631 } 7632 7633 extern __inline __m256 7634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7635 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B) 7636 { 7637 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A, 7638 (__v8sf) __B, 7639 (__v8sf) 7640 _mm256_setzero_ps (), 7641 (__mmask8) __U); 7642 } 7643 7644 extern __inline __m256 7645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7646 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B) 7647 { 7648 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A, 7649 (__v8sf) __B, 7650 (__v8sf) 7651 _mm256_setzero_ps (), 7652 (__mmask8) __U); 7653 } 7654 7655 extern __inline __m128 7656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7657 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7658 { 7659 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A, 7660 (__v4sf) __B, 7661 (__v4sf) __W, 7662 (__mmask8) __U); 7663 } 7664 7665 extern __inline __m128 7666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7667 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7668 { 7669 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A, 7670 (__v4sf) __B, 7671 (__v4sf) __W, 7672 (__mmask8) __U); 7673 } 7674 7675 extern __inline __m128 7676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7677 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B) 7678 { 7679 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A, 7680 (__v4sf) __B, 7681 (__v4sf) 7682 _mm_setzero_ps (), 7683 (__mmask8) __U); 7684 } 7685 7686 extern __inline __m128 7687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7688 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B) 7689 { 7690 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A, 7691 (__v4sf) __B, 7692 (__v4sf) 7693 _mm_setzero_ps (), 7694 (__mmask8) __U); 7695 } 7696 7697 extern __inline __m128 7698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7699 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7700 { 7701 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A, 7702 (__v4sf) __B, 7703 (__v4sf) __W, 7704 (__mmask8) __U); 7705 } 7706 7707 extern __inline __m128 7708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7709 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B) 7710 { 7711 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A, 7712 (__v4sf) __B, 7713 (__v4sf) 7714 _mm_setzero_ps (), 7715 (__mmask8) __U); 7716 } 7717 7718 extern __inline __m128d 7719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7720 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7721 { 7722 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A, 7723 (__v2df) __B, 7724 (__v2df) __W, 7725 (__mmask8) __U); 7726 } 7727 7728 extern __inline __m128d 7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7730 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B) 7731 { 7732 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A, 7733 (__v2df) __B, 7734 (__v2df) 7735 _mm_setzero_pd (), 7736 (__mmask8) __U); 7737 } 7738 7739 extern __inline __m128d 7740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7741 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7742 { 7743 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A, 7744 (__v2df) __B, 7745 (__v2df) __W, 7746 (__mmask8) __U); 7747 } 7748 7749 extern __inline __m128d 7750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7751 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B) 7752 { 7753 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A, 7754 (__v2df) __B, 7755 (__v2df) 7756 _mm_setzero_pd (), 7757 (__mmask8) __U); 7758 } 7759 7760 extern __inline __m128d 7761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7762 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7763 { 7764 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A, 7765 (__v2df) __B, 7766 (__v2df) __W, 7767 (__mmask8) __U); 7768 } 7769 7770 extern __inline __m128d 7771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7772 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B) 7773 { 7774 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A, 7775 (__v2df) __B, 7776 (__v2df) 7777 _mm_setzero_pd (), 7778 (__mmask8) __U); 7779 } 7780 7781 extern __inline __m256 7782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7783 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 7784 { 7785 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A, 7786 (__v8sf) __B, 7787 (__v8sf) __W, 7788 (__mmask8) __U); 7789 } 7790 7791 extern __inline __m256 7792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7793 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B) 7794 { 7795 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A, 7796 (__v8sf) __B, 7797 (__v8sf) 7798 _mm256_setzero_ps (), 7799 (__mmask8) __U); 7800 } 7801 7802 extern __inline __m256d 7803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7804 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A, 7805 __m256d __B) 7806 { 7807 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A, 7808 (__v4df) __B, 7809 (__v4df) __W, 7810 (__mmask8) __U); 7811 } 7812 7813 extern __inline __m256d 7814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7815 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B) 7816 { 7817 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A, 7818 (__v4df) __B, 7819 (__v4df) 7820 _mm256_setzero_pd (), 7821 (__mmask8) __U); 7822 } 7823 7824 extern __inline __m256i 7825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7826 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) 7827 { 7828 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 7829 (__v4di) __B, 7830 (__v4di) 7831 _mm256_setzero_si256 (), 7832 __M); 7833 } 7834 7835 extern __inline __m256i 7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7837 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 7838 __m256i __B) 7839 { 7840 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 7841 (__v4di) __B, 7842 (__v4di) __W, __M); 7843 } 7844 7845 extern __inline __m256i 7846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7847 _mm256_min_epi64 (__m256i __A, __m256i __B) 7848 { 7849 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 7850 (__v4di) __B, 7851 (__v4di) 7852 _mm256_setzero_si256 (), 7853 (__mmask8) -1); 7854 } 7855 7856 extern __inline __m256i 7857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7858 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 7859 __m256i __B) 7860 { 7861 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 7862 (__v4di) __B, 7863 (__v4di) __W, __M); 7864 } 7865 7866 extern __inline __m256i 7867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7868 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) 7869 { 7870 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 7871 (__v4di) __B, 7872 (__v4di) 7873 _mm256_setzero_si256 (), 7874 __M); 7875 } 7876 7877 extern __inline __m256i 7878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7879 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) 7880 { 7881 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 7882 (__v4di) __B, 7883 (__v4di) 7884 _mm256_setzero_si256 (), 7885 __M); 7886 } 7887 7888 extern __inline __m256i 7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7890 _mm256_max_epi64 (__m256i __A, __m256i __B) 7891 { 7892 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 7893 (__v4di) __B, 7894 (__v4di) 7895 _mm256_setzero_si256 (), 7896 (__mmask8) -1); 7897 } 7898 7899 extern __inline __m256i 7900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7901 _mm256_max_epu64 (__m256i __A, __m256i __B) 7902 { 7903 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 7904 (__v4di) __B, 7905 (__v4di) 7906 _mm256_setzero_si256 (), 7907 (__mmask8) -1); 7908 } 7909 7910 extern __inline __m256i 7911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7912 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 7913 __m256i __B) 7914 { 7915 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 7916 (__v4di) __B, 7917 (__v4di) __W, __M); 7918 } 7919 7920 extern __inline __m256i 7921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7922 _mm256_min_epu64 (__m256i __A, __m256i __B) 7923 { 7924 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 7925 (__v4di) __B, 7926 (__v4di) 7927 _mm256_setzero_si256 (), 7928 (__mmask8) -1); 7929 } 7930 7931 extern __inline __m256i 7932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7933 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 7934 __m256i __B) 7935 { 7936 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 7937 (__v4di) __B, 7938 (__v4di) __W, __M); 7939 } 7940 7941 extern __inline __m256i 7942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7943 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) 7944 { 7945 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 7946 (__v4di) __B, 7947 (__v4di) 7948 _mm256_setzero_si256 (), 7949 __M); 7950 } 7951 7952 extern __inline __m256i 7953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7954 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B) 7955 { 7956 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A, 7957 (__v8si) __B, 7958 (__v8si) 7959 _mm256_setzero_si256 (), 7960 __M); 7961 } 7962 7963 extern __inline __m256i 7964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7965 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 7966 __m256i __B) 7967 { 7968 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A, 7969 (__v8si) __B, 7970 (__v8si) __W, __M); 7971 } 7972 7973 extern __inline __m256i 7974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7975 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B) 7976 { 7977 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A, 7978 (__v8si) __B, 7979 (__v8si) 7980 _mm256_setzero_si256 (), 7981 __M); 7982 } 7983 7984 extern __inline __m256i 7985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7986 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 7987 __m256i __B) 7988 { 7989 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A, 7990 (__v8si) __B, 7991 (__v8si) __W, __M); 7992 } 7993 7994 extern __inline __m256i 7995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7996 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B) 7997 { 7998 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A, 7999 (__v8si) __B, 8000 (__v8si) 8001 _mm256_setzero_si256 (), 8002 __M); 8003 } 8004 8005 extern __inline __m256i 8006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8007 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A, 8008 __m256i __B) 8009 { 8010 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A, 8011 (__v8si) __B, 8012 (__v8si) __W, __M); 8013 } 8014 8015 extern __inline __m256i 8016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8017 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B) 8018 { 8019 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A, 8020 (__v8si) __B, 8021 (__v8si) 8022 _mm256_setzero_si256 (), 8023 __M); 8024 } 8025 8026 extern __inline __m256i 8027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8028 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A, 8029 __m256i __B) 8030 { 8031 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A, 8032 (__v8si) __B, 8033 (__v8si) __W, __M); 8034 } 8035 8036 extern __inline __m128i 8037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8038 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) 8039 { 8040 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 8041 (__v2di) __B, 8042 (__v2di) 8043 _mm_setzero_si128 (), 8044 __M); 8045 } 8046 8047 extern __inline __m128i 8048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8049 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 8050 __m128i __B) 8051 { 8052 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 8053 (__v2di) __B, 8054 (__v2di) __W, __M); 8055 } 8056 8057 extern __inline __m128i 8058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8059 _mm_min_epi64 (__m128i __A, __m128i __B) 8060 { 8061 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 8062 (__v2di) __B, 8063 (__v2di) 8064 _mm_setzero_si128 (), 8065 (__mmask8) -1); 8066 } 8067 8068 extern __inline __m128i 8069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8070 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 8071 __m128i __B) 8072 { 8073 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 8074 (__v2di) __B, 8075 (__v2di) __W, __M); 8076 } 8077 8078 extern __inline __m128i 8079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8080 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) 8081 { 8082 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 8083 (__v2di) __B, 8084 (__v2di) 8085 _mm_setzero_si128 (), 8086 __M); 8087 } 8088 8089 extern __inline __m128i 8090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8091 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) 8092 { 8093 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 8094 (__v2di) __B, 8095 (__v2di) 8096 _mm_setzero_si128 (), 8097 __M); 8098 } 8099 8100 extern __inline __m128i 8101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8102 _mm_max_epi64 (__m128i __A, __m128i __B) 8103 { 8104 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 8105 (__v2di) __B, 8106 (__v2di) 8107 _mm_setzero_si128 (), 8108 (__mmask8) -1); 8109 } 8110 8111 extern __inline __m128i 8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8113 _mm_max_epu64 (__m128i __A, __m128i __B) 8114 { 8115 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 8116 (__v2di) __B, 8117 (__v2di) 8118 _mm_setzero_si128 (), 8119 (__mmask8) -1); 8120 } 8121 8122 extern __inline __m128i 8123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8124 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 8125 __m128i __B) 8126 { 8127 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 8128 (__v2di) __B, 8129 (__v2di) __W, __M); 8130 } 8131 8132 extern __inline __m128i 8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8134 _mm_min_epu64 (__m128i __A, __m128i __B) 8135 { 8136 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 8137 (__v2di) __B, 8138 (__v2di) 8139 _mm_setzero_si128 (), 8140 (__mmask8) -1); 8141 } 8142 8143 extern __inline __m128i 8144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8145 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 8146 __m128i __B) 8147 { 8148 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 8149 (__v2di) __B, 8150 (__v2di) __W, __M); 8151 } 8152 8153 extern __inline __m128i 8154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8155 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) 8156 { 8157 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 8158 (__v2di) __B, 8159 (__v2di) 8160 _mm_setzero_si128 (), 8161 __M); 8162 } 8163 8164 extern __inline __m128i 8165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8166 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B) 8167 { 8168 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A, 8169 (__v4si) __B, 8170 (__v4si) 8171 _mm_setzero_si128 (), 8172 __M); 8173 } 8174 8175 extern __inline __m128i 8176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8177 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A, 8178 __m128i __B) 8179 { 8180 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A, 8181 (__v4si) __B, 8182 (__v4si) __W, __M); 8183 } 8184 8185 extern __inline __m128i 8186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8187 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B) 8188 { 8189 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A, 8190 (__v4si) __B, 8191 (__v4si) 8192 _mm_setzero_si128 (), 8193 __M); 8194 } 8195 8196 extern __inline __m128i 8197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8198 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A, 8199 __m128i __B) 8200 { 8201 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A, 8202 (__v4si) __B, 8203 (__v4si) __W, __M); 8204 } 8205 8206 extern __inline __m128i 8207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8208 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B) 8209 { 8210 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A, 8211 (__v4si) __B, 8212 (__v4si) 8213 _mm_setzero_si128 (), 8214 __M); 8215 } 8216 8217 extern __inline __m128i 8218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8219 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A, 8220 __m128i __B) 8221 { 8222 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A, 8223 (__v4si) __B, 8224 (__v4si) __W, __M); 8225 } 8226 8227 extern __inline __m128i 8228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8229 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B) 8230 { 8231 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A, 8232 (__v4si) __B, 8233 (__v4si) 8234 _mm_setzero_si128 (), 8235 __M); 8236 } 8237 8238 extern __inline __m128i 8239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8240 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A, 8241 __m128i __B) 8242 { 8243 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A, 8244 (__v4si) __B, 8245 (__v4si) __W, __M); 8246 } 8247 8248 #ifndef __AVX512CD__ 8249 #pragma GCC push_options 8250 #pragma GCC target("avx512vl,avx512cd") 8251 #define __DISABLE_AVX512VLCD__ 8252 #endif 8253 8254 extern __inline __m128i 8255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8256 _mm_broadcastmb_epi64 (__mmask8 __A) 8257 { 8258 return (__m128i) __builtin_ia32_broadcastmb128 (__A); 8259 } 8260 8261 extern __inline __m256i 8262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8263 _mm256_broadcastmb_epi64 (__mmask8 __A) 8264 { 8265 return (__m256i) __builtin_ia32_broadcastmb256 (__A); 8266 } 8267 8268 extern __inline __m128i 8269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8270 _mm_broadcastmw_epi32 (__mmask16 __A) 8271 { 8272 return (__m128i) __builtin_ia32_broadcastmw128 (__A); 8273 } 8274 8275 extern __inline __m256i 8276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8277 _mm256_broadcastmw_epi32 (__mmask16 __A) 8278 { 8279 return (__m256i) __builtin_ia32_broadcastmw256 (__A); 8280 } 8281 8282 extern __inline __m256i 8283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8284 _mm256_lzcnt_epi32 (__m256i __A) 8285 { 8286 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 8287 (__v8si) 8288 _mm256_setzero_si256 (), 8289 (__mmask8) -1); 8290 } 8291 8292 extern __inline __m256i 8293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8294 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 8295 { 8296 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 8297 (__v8si) __W, 8298 (__mmask8) __U); 8299 } 8300 8301 extern __inline __m256i 8302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8303 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) 8304 { 8305 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A, 8306 (__v8si) 8307 _mm256_setzero_si256 (), 8308 (__mmask8) __U); 8309 } 8310 8311 extern __inline __m256i 8312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8313 _mm256_lzcnt_epi64 (__m256i __A) 8314 { 8315 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 8316 (__v4di) 8317 _mm256_setzero_si256 (), 8318 (__mmask8) -1); 8319 } 8320 8321 extern __inline __m256i 8322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8323 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 8324 { 8325 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 8326 (__v4di) __W, 8327 (__mmask8) __U); 8328 } 8329 8330 extern __inline __m256i 8331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8332 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) 8333 { 8334 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A, 8335 (__v4di) 8336 _mm256_setzero_si256 (), 8337 (__mmask8) __U); 8338 } 8339 8340 extern __inline __m256i 8341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8342 _mm256_conflict_epi64 (__m256i __A) 8343 { 8344 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 8345 (__v4di) 8346 _mm256_setzero_si256 (), 8347 (__mmask8) -1); 8348 } 8349 8350 extern __inline __m256i 8351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8352 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 8353 { 8354 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 8355 (__v4di) __W, 8356 (__mmask8) 8357 __U); 8358 } 8359 8360 extern __inline __m256i 8361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8362 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) 8363 { 8364 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 8365 (__v4di) 8366 _mm256_setzero_si256 (), 8367 (__mmask8) 8368 __U); 8369 } 8370 8371 extern __inline __m256i 8372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8373 _mm256_conflict_epi32 (__m256i __A) 8374 { 8375 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 8376 (__v8si) 8377 _mm256_setzero_si256 (), 8378 (__mmask8) -1); 8379 } 8380 8381 extern __inline __m256i 8382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8383 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 8384 { 8385 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 8386 (__v8si) __W, 8387 (__mmask8) 8388 __U); 8389 } 8390 8391 extern __inline __m256i 8392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8393 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) 8394 { 8395 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 8396 (__v8si) 8397 _mm256_setzero_si256 (), 8398 (__mmask8) 8399 __U); 8400 } 8401 8402 extern __inline __m128i 8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8404 _mm_lzcnt_epi32 (__m128i __A) 8405 { 8406 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 8407 (__v4si) 8408 _mm_setzero_si128 (), 8409 (__mmask8) -1); 8410 } 8411 8412 extern __inline __m128i 8413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8414 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 8415 { 8416 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 8417 (__v4si) __W, 8418 (__mmask8) __U); 8419 } 8420 8421 extern __inline __m128i 8422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8423 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) 8424 { 8425 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A, 8426 (__v4si) 8427 _mm_setzero_si128 (), 8428 (__mmask8) __U); 8429 } 8430 8431 extern __inline __m128i 8432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8433 _mm_lzcnt_epi64 (__m128i __A) 8434 { 8435 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 8436 (__v2di) 8437 _mm_setzero_si128 (), 8438 (__mmask8) -1); 8439 } 8440 8441 extern __inline __m128i 8442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8443 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 8444 { 8445 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 8446 (__v2di) __W, 8447 (__mmask8) __U); 8448 } 8449 8450 extern __inline __m128i 8451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8452 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) 8453 { 8454 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A, 8455 (__v2di) 8456 _mm_setzero_si128 (), 8457 (__mmask8) __U); 8458 } 8459 8460 extern __inline __m128i 8461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8462 _mm_conflict_epi64 (__m128i __A) 8463 { 8464 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 8465 (__v2di) 8466 _mm_setzero_si128 (), 8467 (__mmask8) -1); 8468 } 8469 8470 extern __inline __m128i 8471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8472 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 8473 { 8474 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 8475 (__v2di) __W, 8476 (__mmask8) 8477 __U); 8478 } 8479 8480 extern __inline __m128i 8481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8482 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) 8483 { 8484 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 8485 (__v2di) 8486 _mm_setzero_si128 (), 8487 (__mmask8) 8488 __U); 8489 } 8490 8491 extern __inline __m128i 8492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8493 _mm_conflict_epi32 (__m128i __A) 8494 { 8495 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 8496 (__v4si) 8497 _mm_setzero_si128 (), 8498 (__mmask8) -1); 8499 } 8500 8501 extern __inline __m128i 8502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8503 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 8504 { 8505 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 8506 (__v4si) __W, 8507 (__mmask8) 8508 __U); 8509 } 8510 8511 extern __inline __m128i 8512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8513 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) 8514 { 8515 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 8516 (__v4si) 8517 _mm_setzero_si128 (), 8518 (__mmask8) 8519 __U); 8520 } 8521 8522 #ifdef __DISABLE_AVX512VLCD__ 8523 #pragma GCC pop_options 8524 #endif 8525 8526 extern __inline __m256d 8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8528 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A, 8529 __m256d __B) 8530 { 8531 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A, 8532 (__v4df) __B, 8533 (__v4df) __W, 8534 (__mmask8) __U); 8535 } 8536 8537 extern __inline __m256d 8538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8539 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B) 8540 { 8541 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A, 8542 (__v4df) __B, 8543 (__v4df) 8544 _mm256_setzero_pd (), 8545 (__mmask8) __U); 8546 } 8547 8548 extern __inline __m128d 8549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8550 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A, 8551 __m128d __B) 8552 { 8553 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A, 8554 (__v2df) __B, 8555 (__v2df) __W, 8556 (__mmask8) __U); 8557 } 8558 8559 extern __inline __m128d 8560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8561 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B) 8562 { 8563 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A, 8564 (__v2df) __B, 8565 (__v2df) 8566 _mm_setzero_pd (), 8567 (__mmask8) __U); 8568 } 8569 8570 extern __inline __m256 8571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8572 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A, 8573 __m256 __B) 8574 { 8575 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A, 8576 (__v8sf) __B, 8577 (__v8sf) __W, 8578 (__mmask8) __U); 8579 } 8580 8581 extern __inline __m256d 8582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8583 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A, 8584 __m256d __B) 8585 { 8586 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A, 8587 (__v4df) __B, 8588 (__v4df) __W, 8589 (__mmask8) __U); 8590 } 8591 8592 extern __inline __m256d 8593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8594 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B) 8595 { 8596 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A, 8597 (__v4df) __B, 8598 (__v4df) 8599 _mm256_setzero_pd (), 8600 (__mmask8) __U); 8601 } 8602 8603 extern __inline __m128d 8604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8605 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A, 8606 __m128d __B) 8607 { 8608 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A, 8609 (__v2df) __B, 8610 (__v2df) __W, 8611 (__mmask8) __U); 8612 } 8613 8614 extern __inline __m128d 8615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8616 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B) 8617 { 8618 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A, 8619 (__v2df) __B, 8620 (__v2df) 8621 _mm_setzero_pd (), 8622 (__mmask8) __U); 8623 } 8624 8625 extern __inline __m256 8626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8627 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A, 8628 __m256 __B) 8629 { 8630 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A, 8631 (__v8sf) __B, 8632 (__v8sf) __W, 8633 (__mmask8) __U); 8634 } 8635 8636 extern __inline __m256 8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8638 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B) 8639 { 8640 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A, 8641 (__v8sf) __B, 8642 (__v8sf) 8643 _mm256_setzero_ps (), 8644 (__mmask8) __U); 8645 } 8646 8647 extern __inline __m128 8648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8649 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8650 { 8651 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A, 8652 (__v4sf) __B, 8653 (__v4sf) __W, 8654 (__mmask8) __U); 8655 } 8656 8657 extern __inline __m128 8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8659 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B) 8660 { 8661 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A, 8662 (__v4sf) __B, 8663 (__v4sf) 8664 _mm_setzero_ps (), 8665 (__mmask8) __U); 8666 } 8667 8668 extern __inline __m128 8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8670 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8671 { 8672 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8673 (__v4sf) __W, 8674 (__mmask8) __U); 8675 } 8676 8677 extern __inline __m128 8678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8679 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8680 { 8681 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8682 (__v4sf) 8683 _mm_setzero_ps (), 8684 (__mmask8) __U); 8685 } 8686 8687 extern __inline __m256 8688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8689 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B) 8690 { 8691 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A, 8692 (__v8sf) __B, 8693 (__v8sf) 8694 _mm256_setzero_ps (), 8695 (__mmask8) __U); 8696 } 8697 8698 extern __inline __m256 8699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8700 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8701 { 8702 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8703 (__v8sf) __W, 8704 (__mmask8) __U); 8705 } 8706 8707 extern __inline __m256 8708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8709 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8710 { 8711 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8712 (__v8sf) 8713 _mm256_setzero_ps (), 8714 (__mmask8) __U); 8715 } 8716 8717 extern __inline __m128 8718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8719 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8720 { 8721 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A, 8722 (__v4sf) __B, 8723 (__v4sf) __W, 8724 (__mmask8) __U); 8725 } 8726 8727 extern __inline __m128 8728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8729 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B) 8730 { 8731 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A, 8732 (__v4sf) __B, 8733 (__v4sf) 8734 _mm_setzero_ps (), 8735 (__mmask8) __U); 8736 } 8737 8738 extern __inline __m256i 8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8740 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 8741 __m128i __B) 8742 { 8743 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A, 8744 (__v4si) __B, 8745 (__v8si) __W, 8746 (__mmask8) __U); 8747 } 8748 8749 extern __inline __m256i 8750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8751 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 8752 { 8753 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A, 8754 (__v4si) __B, 8755 (__v8si) 8756 _mm256_setzero_si256 (), 8757 (__mmask8) __U); 8758 } 8759 8760 extern __inline __m128i 8761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8762 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 8763 __m128i __B) 8764 { 8765 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A, 8766 (__v4si) __B, 8767 (__v4si) __W, 8768 (__mmask8) __U); 8769 } 8770 8771 extern __inline __m128i 8772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8773 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 8774 { 8775 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A, 8776 (__v4si) __B, 8777 (__v4si) 8778 _mm_setzero_si128 (), 8779 (__mmask8) __U); 8780 } 8781 8782 extern __inline __m256i 8783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8784 _mm256_sra_epi64 (__m256i __A, __m128i __B) 8785 { 8786 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 8787 (__v2di) __B, 8788 (__v4di) 8789 _mm256_setzero_si256 (), 8790 (__mmask8) -1); 8791 } 8792 8793 extern __inline __m256i 8794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8795 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 8796 __m128i __B) 8797 { 8798 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 8799 (__v2di) __B, 8800 (__v4di) __W, 8801 (__mmask8) __U); 8802 } 8803 8804 extern __inline __m256i 8805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8806 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 8807 { 8808 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 8809 (__v2di) __B, 8810 (__v4di) 8811 _mm256_setzero_si256 (), 8812 (__mmask8) __U); 8813 } 8814 8815 extern __inline __m128i 8816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8817 _mm_sra_epi64 (__m128i __A, __m128i __B) 8818 { 8819 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 8820 (__v2di) __B, 8821 (__v2di) 8822 _mm_setzero_si128 (), 8823 (__mmask8) -1); 8824 } 8825 8826 extern __inline __m128i 8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8828 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 8829 __m128i __B) 8830 { 8831 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 8832 (__v2di) __B, 8833 (__v2di) __W, 8834 (__mmask8) __U); 8835 } 8836 8837 extern __inline __m128i 8838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8839 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 8840 { 8841 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 8842 (__v2di) __B, 8843 (__v2di) 8844 _mm_setzero_si128 (), 8845 (__mmask8) __U); 8846 } 8847 8848 extern __inline __m128i 8849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8850 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 8851 __m128i __B) 8852 { 8853 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A, 8854 (__v4si) __B, 8855 (__v4si) __W, 8856 (__mmask8) __U); 8857 } 8858 8859 extern __inline __m128i 8860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8861 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 8862 { 8863 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A, 8864 (__v4si) __B, 8865 (__v4si) 8866 _mm_setzero_si128 (), 8867 (__mmask8) __U); 8868 } 8869 8870 extern __inline __m128i 8871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8872 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 8873 __m128i __B) 8874 { 8875 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A, 8876 (__v2di) __B, 8877 (__v2di) __W, 8878 (__mmask8) __U); 8879 } 8880 8881 extern __inline __m128i 8882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8883 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 8884 { 8885 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A, 8886 (__v2di) __B, 8887 (__v2di) 8888 _mm_setzero_si128 (), 8889 (__mmask8) __U); 8890 } 8891 8892 extern __inline __m256i 8893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8894 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 8895 __m128i __B) 8896 { 8897 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A, 8898 (__v4si) __B, 8899 (__v8si) __W, 8900 (__mmask8) __U); 8901 } 8902 8903 extern __inline __m256i 8904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8905 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 8906 { 8907 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A, 8908 (__v4si) __B, 8909 (__v8si) 8910 _mm256_setzero_si256 (), 8911 (__mmask8) __U); 8912 } 8913 8914 extern __inline __m256i 8915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8916 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 8917 __m128i __B) 8918 { 8919 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A, 8920 (__v2di) __B, 8921 (__v4di) __W, 8922 (__mmask8) __U); 8923 } 8924 8925 extern __inline __m256i 8926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8927 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 8928 { 8929 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A, 8930 (__v2di) __B, 8931 (__v4di) 8932 _mm256_setzero_si256 (), 8933 (__mmask8) __U); 8934 } 8935 8936 extern __inline __m256 8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8938 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, 8939 __m256 __Y) 8940 { 8941 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8942 (__v8si) __X, 8943 (__v8sf) __W, 8944 (__mmask8) __U); 8945 } 8946 8947 extern __inline __m256 8948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8949 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) 8950 { 8951 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8952 (__v8si) __X, 8953 (__v8sf) 8954 _mm256_setzero_ps (), 8955 (__mmask8) __U); 8956 } 8957 8958 extern __inline __m256d 8959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8960 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8961 { 8962 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8963 (__v4di) __X, 8964 (__v4df) 8965 _mm256_setzero_pd (), 8966 (__mmask8) -1); 8967 } 8968 8969 extern __inline __m256d 8970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8971 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8972 __m256d __Y) 8973 { 8974 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8975 (__v4di) __X, 8976 (__v4df) __W, 8977 (__mmask8) __U); 8978 } 8979 8980 extern __inline __m256d 8981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8982 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8983 { 8984 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8985 (__v4di) __X, 8986 (__v4df) 8987 _mm256_setzero_pd (), 8988 (__mmask8) __U); 8989 } 8990 8991 extern __inline __m256d 8992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8993 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A, 8994 __m256i __C) 8995 { 8996 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 8997 (__v4di) __C, 8998 (__v4df) __W, 8999 (__mmask8) 9000 __U); 9001 } 9002 9003 extern __inline __m256d 9004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9005 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C) 9006 { 9007 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 9008 (__v4di) __C, 9009 (__v4df) 9010 _mm256_setzero_pd (), 9011 (__mmask8) 9012 __U); 9013 } 9014 9015 extern __inline __m256 9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9017 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A, 9018 __m256i __C) 9019 { 9020 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 9021 (__v8si) __C, 9022 (__v8sf) __W, 9023 (__mmask8) __U); 9024 } 9025 9026 extern __inline __m256 9027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9028 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C) 9029 { 9030 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 9031 (__v8si) __C, 9032 (__v8sf) 9033 _mm256_setzero_ps (), 9034 (__mmask8) __U); 9035 } 9036 9037 extern __inline __m128d 9038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9039 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A, 9040 __m128i __C) 9041 { 9042 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 9043 (__v2di) __C, 9044 (__v2df) __W, 9045 (__mmask8) __U); 9046 } 9047 9048 extern __inline __m128d 9049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9050 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C) 9051 { 9052 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 9053 (__v2di) __C, 9054 (__v2df) 9055 _mm_setzero_pd (), 9056 (__mmask8) __U); 9057 } 9058 9059 extern __inline __m128 9060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9061 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A, 9062 __m128i __C) 9063 { 9064 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 9065 (__v4si) __C, 9066 (__v4sf) __W, 9067 (__mmask8) __U); 9068 } 9069 9070 extern __inline __m128 9071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9072 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C) 9073 { 9074 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 9075 (__v4si) __C, 9076 (__v4sf) 9077 _mm_setzero_ps (), 9078 (__mmask8) __U); 9079 } 9080 9081 extern __inline __m256i 9082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9083 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) 9084 { 9085 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, 9086 (__v8si) __B, 9087 (__v8si) 9088 _mm256_setzero_si256 (), 9089 __M); 9090 } 9091 9092 extern __inline __m256i 9093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9094 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 9095 { 9096 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 9097 (__v4di) __X, 9098 (__v4di) 9099 _mm256_setzero_si256 (), 9100 __M); 9101 } 9102 9103 extern __inline __m256i 9104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9105 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 9106 __m256i __B) 9107 { 9108 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, 9109 (__v8si) __B, 9110 (__v8si) __W, __M); 9111 } 9112 9113 extern __inline __m128i 9114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9115 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) 9116 { 9117 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, 9118 (__v4si) __B, 9119 (__v4si) 9120 _mm_setzero_si128 (), 9121 __M); 9122 } 9123 9124 extern __inline __m128i 9125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9126 _mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A, 9127 __m128i __B) 9128 { 9129 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, 9130 (__v4si) __B, 9131 (__v4si) __W, __M); 9132 } 9133 9134 extern __inline __m256i 9135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9136 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 9137 __m256i __Y) 9138 { 9139 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, 9140 (__v8si) __Y, 9141 (__v4di) __W, __M); 9142 } 9143 9144 extern __inline __m256i 9145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9146 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 9147 { 9148 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, 9149 (__v8si) __Y, 9150 (__v4di) 9151 _mm256_setzero_si256 (), 9152 __M); 9153 } 9154 9155 extern __inline __m128i 9156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9157 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, 9158 __m128i __Y) 9159 { 9160 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, 9161 (__v4si) __Y, 9162 (__v2di) __W, __M); 9163 } 9164 9165 extern __inline __m128i 9166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9167 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) 9168 { 9169 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, 9170 (__v4si) __Y, 9171 (__v2di) 9172 _mm_setzero_si128 (), 9173 __M); 9174 } 9175 9176 extern __inline __m256i 9177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9178 _mm256_permutexvar_epi64 (__m256i __X, __m256i __Y) 9179 { 9180 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 9181 (__v4di) __X, 9182 (__v4di) 9183 _mm256_setzero_si256 (), 9184 (__mmask8) -1); 9185 } 9186 9187 extern __inline __m256i 9188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9189 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 9190 __m256i __Y) 9191 { 9192 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 9193 (__v4di) __X, 9194 (__v4di) __W, 9195 __M); 9196 } 9197 9198 extern __inline __m256i 9199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9200 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, 9201 __m256i __Y) 9202 { 9203 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, 9204 (__v8si) __Y, 9205 (__v4di) __W, __M); 9206 } 9207 9208 extern __inline __m256i 9209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9210 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 9211 { 9212 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 9213 (__v8si) __X, 9214 (__v8si) 9215 _mm256_setzero_si256 (), 9216 __M); 9217 } 9218 9219 extern __inline __m256i 9220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9221 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) 9222 { 9223 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, 9224 (__v8si) __Y, 9225 (__v4di) 9226 _mm256_setzero_si256 (), 9227 __M); 9228 } 9229 9230 extern __inline __m128i 9231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9232 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, 9233 __m128i __Y) 9234 { 9235 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, 9236 (__v4si) __Y, 9237 (__v2di) __W, __M); 9238 } 9239 9240 extern __inline __m128i 9241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9242 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) 9243 { 9244 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, 9245 (__v4si) __Y, 9246 (__v2di) 9247 _mm_setzero_si128 (), 9248 __M); 9249 } 9250 9251 extern __inline __m256i 9252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9253 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) 9254 { 9255 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 9256 (__v8si) __X, 9257 (__v8si) 9258 _mm256_setzero_si256 (), 9259 (__mmask8) -1); 9260 } 9261 9262 extern __inline __m256i 9263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9264 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 9265 __m256i __Y) 9266 { 9267 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 9268 (__v8si) __X, 9269 (__v8si) __W, 9270 __M); 9271 } 9272 9273 extern __inline __mmask8 9274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9275 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9276 { 9277 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9278 (__v8si) __Y, 4, 9279 (__mmask8) __M); 9280 } 9281 9282 extern __inline __mmask8 9283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9284 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y) 9285 { 9286 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9287 (__v8si) __Y, 4, 9288 (__mmask8) -1); 9289 } 9290 9291 extern __inline __mmask8 9292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9293 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9294 { 9295 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9296 (__v8si) __Y, 1, 9297 (__mmask8) __M); 9298 } 9299 9300 extern __inline __mmask8 9301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9302 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y) 9303 { 9304 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9305 (__v8si) __Y, 1, 9306 (__mmask8) -1); 9307 } 9308 9309 extern __inline __mmask8 9310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9311 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9312 { 9313 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9314 (__v8si) __Y, 5, 9315 (__mmask8) __M); 9316 } 9317 9318 extern __inline __mmask8 9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9320 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y) 9321 { 9322 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9323 (__v8si) __Y, 5, 9324 (__mmask8) -1); 9325 } 9326 9327 extern __inline __mmask8 9328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9329 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9330 { 9331 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9332 (__v8si) __Y, 2, 9333 (__mmask8) __M); 9334 } 9335 9336 extern __inline __mmask8 9337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9338 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y) 9339 { 9340 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 9341 (__v8si) __Y, 2, 9342 (__mmask8) -1); 9343 } 9344 9345 extern __inline __mmask8 9346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9347 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9348 { 9349 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9350 (__v4di) __Y, 4, 9351 (__mmask8) __M); 9352 } 9353 9354 extern __inline __mmask8 9355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9356 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y) 9357 { 9358 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9359 (__v4di) __Y, 4, 9360 (__mmask8) -1); 9361 } 9362 9363 extern __inline __mmask8 9364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9365 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9366 { 9367 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9368 (__v4di) __Y, 1, 9369 (__mmask8) __M); 9370 } 9371 9372 extern __inline __mmask8 9373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9374 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y) 9375 { 9376 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9377 (__v4di) __Y, 1, 9378 (__mmask8) -1); 9379 } 9380 9381 extern __inline __mmask8 9382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9383 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9384 { 9385 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9386 (__v4di) __Y, 5, 9387 (__mmask8) __M); 9388 } 9389 9390 extern __inline __mmask8 9391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9392 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y) 9393 { 9394 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9395 (__v4di) __Y, 5, 9396 (__mmask8) -1); 9397 } 9398 9399 extern __inline __mmask8 9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9401 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9402 { 9403 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9404 (__v4di) __Y, 2, 9405 (__mmask8) __M); 9406 } 9407 9408 extern __inline __mmask8 9409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9410 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y) 9411 { 9412 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 9413 (__v4di) __Y, 2, 9414 (__mmask8) -1); 9415 } 9416 9417 extern __inline __mmask8 9418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9419 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9420 { 9421 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9422 (__v8si) __Y, 4, 9423 (__mmask8) __M); 9424 } 9425 9426 extern __inline __mmask8 9427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9428 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y) 9429 { 9430 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9431 (__v8si) __Y, 4, 9432 (__mmask8) -1); 9433 } 9434 9435 extern __inline __mmask8 9436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9437 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9438 { 9439 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9440 (__v8si) __Y, 1, 9441 (__mmask8) __M); 9442 } 9443 9444 extern __inline __mmask8 9445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9446 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y) 9447 { 9448 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9449 (__v8si) __Y, 1, 9450 (__mmask8) -1); 9451 } 9452 9453 extern __inline __mmask8 9454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9455 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9456 { 9457 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9458 (__v8si) __Y, 5, 9459 (__mmask8) __M); 9460 } 9461 9462 extern __inline __mmask8 9463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9464 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y) 9465 { 9466 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9467 (__v8si) __Y, 5, 9468 (__mmask8) -1); 9469 } 9470 9471 extern __inline __mmask8 9472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9473 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9474 { 9475 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9476 (__v8si) __Y, 2, 9477 (__mmask8) __M); 9478 } 9479 9480 extern __inline __mmask8 9481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9482 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y) 9483 { 9484 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 9485 (__v8si) __Y, 2, 9486 (__mmask8) -1); 9487 } 9488 9489 extern __inline __mmask8 9490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9491 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9492 { 9493 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9494 (__v4di) __Y, 4, 9495 (__mmask8) __M); 9496 } 9497 9498 extern __inline __mmask8 9499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9500 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y) 9501 { 9502 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9503 (__v4di) __Y, 4, 9504 (__mmask8) -1); 9505 } 9506 9507 extern __inline __mmask8 9508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9509 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9510 { 9511 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9512 (__v4di) __Y, 1, 9513 (__mmask8) __M); 9514 } 9515 9516 extern __inline __mmask8 9517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9518 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y) 9519 { 9520 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9521 (__v4di) __Y, 1, 9522 (__mmask8) -1); 9523 } 9524 9525 extern __inline __mmask8 9526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9527 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9528 { 9529 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9530 (__v4di) __Y, 5, 9531 (__mmask8) __M); 9532 } 9533 9534 extern __inline __mmask8 9535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9536 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y) 9537 { 9538 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9539 (__v4di) __Y, 5, 9540 (__mmask8) -1); 9541 } 9542 9543 extern __inline __mmask8 9544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9545 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y) 9546 { 9547 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9548 (__v4di) __Y, 2, 9549 (__mmask8) __M); 9550 } 9551 9552 extern __inline __mmask8 9553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9554 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y) 9555 { 9556 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 9557 (__v4di) __Y, 2, 9558 (__mmask8) -1); 9559 } 9560 9561 extern __inline __mmask8 9562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9563 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9564 { 9565 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9566 (__v4si) __Y, 4, 9567 (__mmask8) __M); 9568 } 9569 9570 extern __inline __mmask8 9571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9572 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y) 9573 { 9574 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9575 (__v4si) __Y, 4, 9576 (__mmask8) -1); 9577 } 9578 9579 extern __inline __mmask8 9580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9581 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9582 { 9583 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9584 (__v4si) __Y, 1, 9585 (__mmask8) __M); 9586 } 9587 9588 extern __inline __mmask8 9589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9590 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y) 9591 { 9592 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9593 (__v4si) __Y, 1, 9594 (__mmask8) -1); 9595 } 9596 9597 extern __inline __mmask8 9598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9599 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9600 { 9601 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9602 (__v4si) __Y, 5, 9603 (__mmask8) __M); 9604 } 9605 9606 extern __inline __mmask8 9607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9608 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y) 9609 { 9610 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9611 (__v4si) __Y, 5, 9612 (__mmask8) -1); 9613 } 9614 9615 extern __inline __mmask8 9616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9617 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9618 { 9619 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9620 (__v4si) __Y, 2, 9621 (__mmask8) __M); 9622 } 9623 9624 extern __inline __mmask8 9625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9626 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y) 9627 { 9628 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 9629 (__v4si) __Y, 2, 9630 (__mmask8) -1); 9631 } 9632 9633 extern __inline __mmask8 9634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9635 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9636 { 9637 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9638 (__v2di) __Y, 4, 9639 (__mmask8) __M); 9640 } 9641 9642 extern __inline __mmask8 9643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9644 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y) 9645 { 9646 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9647 (__v2di) __Y, 4, 9648 (__mmask8) -1); 9649 } 9650 9651 extern __inline __mmask8 9652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9653 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9654 { 9655 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9656 (__v2di) __Y, 1, 9657 (__mmask8) __M); 9658 } 9659 9660 extern __inline __mmask8 9661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9662 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y) 9663 { 9664 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9665 (__v2di) __Y, 1, 9666 (__mmask8) -1); 9667 } 9668 9669 extern __inline __mmask8 9670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9671 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9672 { 9673 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9674 (__v2di) __Y, 5, 9675 (__mmask8) __M); 9676 } 9677 9678 extern __inline __mmask8 9679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9680 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y) 9681 { 9682 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9683 (__v2di) __Y, 5, 9684 (__mmask8) -1); 9685 } 9686 9687 extern __inline __mmask8 9688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9689 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9690 { 9691 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9692 (__v2di) __Y, 2, 9693 (__mmask8) __M); 9694 } 9695 9696 extern __inline __mmask8 9697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9698 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y) 9699 { 9700 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 9701 (__v2di) __Y, 2, 9702 (__mmask8) -1); 9703 } 9704 9705 extern __inline __mmask8 9706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9707 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9708 { 9709 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9710 (__v4si) __Y, 4, 9711 (__mmask8) __M); 9712 } 9713 9714 extern __inline __mmask8 9715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9716 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y) 9717 { 9718 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9719 (__v4si) __Y, 4, 9720 (__mmask8) -1); 9721 } 9722 9723 extern __inline __mmask8 9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9725 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9726 { 9727 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9728 (__v4si) __Y, 1, 9729 (__mmask8) __M); 9730 } 9731 9732 extern __inline __mmask8 9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9734 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y) 9735 { 9736 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9737 (__v4si) __Y, 1, 9738 (__mmask8) -1); 9739 } 9740 9741 extern __inline __mmask8 9742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9743 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9744 { 9745 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9746 (__v4si) __Y, 5, 9747 (__mmask8) __M); 9748 } 9749 9750 extern __inline __mmask8 9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9752 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y) 9753 { 9754 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9755 (__v4si) __Y, 5, 9756 (__mmask8) -1); 9757 } 9758 9759 extern __inline __mmask8 9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9761 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9762 { 9763 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9764 (__v4si) __Y, 2, 9765 (__mmask8) __M); 9766 } 9767 9768 extern __inline __mmask8 9769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9770 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y) 9771 { 9772 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 9773 (__v4si) __Y, 2, 9774 (__mmask8) -1); 9775 } 9776 9777 extern __inline __mmask8 9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9779 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9780 { 9781 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9782 (__v2di) __Y, 4, 9783 (__mmask8) __M); 9784 } 9785 9786 extern __inline __mmask8 9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9788 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y) 9789 { 9790 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9791 (__v2di) __Y, 4, 9792 (__mmask8) -1); 9793 } 9794 9795 extern __inline __mmask8 9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9797 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9798 { 9799 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9800 (__v2di) __Y, 1, 9801 (__mmask8) __M); 9802 } 9803 9804 extern __inline __mmask8 9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9806 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y) 9807 { 9808 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9809 (__v2di) __Y, 1, 9810 (__mmask8) -1); 9811 } 9812 9813 extern __inline __mmask8 9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9815 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9816 { 9817 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9818 (__v2di) __Y, 5, 9819 (__mmask8) __M); 9820 } 9821 9822 extern __inline __mmask8 9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9824 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y) 9825 { 9826 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9827 (__v2di) __Y, 5, 9828 (__mmask8) -1); 9829 } 9830 9831 extern __inline __mmask8 9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9833 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y) 9834 { 9835 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9836 (__v2di) __Y, 2, 9837 (__mmask8) __M); 9838 } 9839 9840 extern __inline __mmask8 9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9842 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) 9843 { 9844 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 9845 (__v2di) __Y, 2, 9846 (__mmask8) -1); 9847 } 9848 9849 #ifdef __OPTIMIZE__ 9850 extern __inline __m256i 9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9852 _mm256_permutex_epi64 (__m256i __X, const int __I) 9853 { 9854 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, 9855 __I, 9856 (__v4di) 9857 _mm256_setzero_si256(), 9858 (__mmask8) -1); 9859 } 9860 9861 extern __inline __m256i 9862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9863 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M, 9864 __m256i __X, const int __I) 9865 { 9866 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, 9867 __I, 9868 (__v4di) __W, 9869 (__mmask8) __M); 9870 } 9871 9872 extern __inline __m256i 9873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9874 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I) 9875 { 9876 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X, 9877 __I, 9878 (__v4di) 9879 _mm256_setzero_si256 (), 9880 (__mmask8) __M); 9881 } 9882 9883 extern __inline __m256d 9884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9885 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A, 9886 __m256d __B, const int __imm) 9887 { 9888 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, 9889 (__v4df) __B, __imm, 9890 (__v4df) __W, 9891 (__mmask8) __U); 9892 } 9893 9894 extern __inline __m256d 9895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9896 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B, 9897 const int __imm) 9898 { 9899 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A, 9900 (__v4df) __B, __imm, 9901 (__v4df) 9902 _mm256_setzero_pd (), 9903 (__mmask8) __U); 9904 } 9905 9906 extern __inline __m128d 9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9908 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A, 9909 __m128d __B, const int __imm) 9910 { 9911 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, 9912 (__v2df) __B, __imm, 9913 (__v2df) __W, 9914 (__mmask8) __U); 9915 } 9916 9917 extern __inline __m128d 9918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9919 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B, 9920 const int __imm) 9921 { 9922 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A, 9923 (__v2df) __B, __imm, 9924 (__v2df) 9925 _mm_setzero_pd (), 9926 (__mmask8) __U); 9927 } 9928 9929 extern __inline __m256 9930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9931 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A, 9932 __m256 __B, const int __imm) 9933 { 9934 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, 9935 (__v8sf) __B, __imm, 9936 (__v8sf) __W, 9937 (__mmask8) __U); 9938 } 9939 9940 extern __inline __m256 9941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9942 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B, 9943 const int __imm) 9944 { 9945 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A, 9946 (__v8sf) __B, __imm, 9947 (__v8sf) 9948 _mm256_setzero_ps (), 9949 (__mmask8) __U); 9950 } 9951 9952 extern __inline __m128 9953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9954 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 9955 const int __imm) 9956 { 9957 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, 9958 (__v4sf) __B, __imm, 9959 (__v4sf) __W, 9960 (__mmask8) __U); 9961 } 9962 9963 extern __inline __m128 9964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9965 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B, 9966 const int __imm) 9967 { 9968 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A, 9969 (__v4sf) __B, __imm, 9970 (__v4sf) 9971 _mm_setzero_ps (), 9972 (__mmask8) __U); 9973 } 9974 9975 extern __inline __m256i 9976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9977 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm) 9978 { 9979 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, 9980 (__v4si) __B, 9981 __imm, 9982 (__v8si) 9983 _mm256_setzero_si256 (), 9984 (__mmask8) -1); 9985 } 9986 9987 extern __inline __m256i 9988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9989 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A, 9990 __m128i __B, const int __imm) 9991 { 9992 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, 9993 (__v4si) __B, 9994 __imm, 9995 (__v8si) __W, 9996 (__mmask8) 9997 __U); 9998 } 9999 10000 extern __inline __m256i 10001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10002 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B, 10003 const int __imm) 10004 { 10005 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A, 10006 (__v4si) __B, 10007 __imm, 10008 (__v8si) 10009 _mm256_setzero_si256 (), 10010 (__mmask8) 10011 __U); 10012 } 10013 10014 extern __inline __m256 10015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10016 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm) 10017 { 10018 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, 10019 (__v4sf) __B, 10020 __imm, 10021 (__v8sf) 10022 _mm256_setzero_ps (), 10023 (__mmask8) -1); 10024 } 10025 10026 extern __inline __m256 10027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10028 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A, 10029 __m128 __B, const int __imm) 10030 { 10031 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, 10032 (__v4sf) __B, 10033 __imm, 10034 (__v8sf) __W, 10035 (__mmask8) __U); 10036 } 10037 10038 extern __inline __m256 10039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10040 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B, 10041 const int __imm) 10042 { 10043 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A, 10044 (__v4sf) __B, 10045 __imm, 10046 (__v8sf) 10047 _mm256_setzero_ps (), 10048 (__mmask8) __U); 10049 } 10050 10051 extern __inline __m128i 10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10053 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm) 10054 { 10055 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, 10056 __imm, 10057 (__v4si) 10058 _mm_setzero_si128 (), 10059 (__mmask8) -1); 10060 } 10061 10062 extern __inline __m128i 10063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10064 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A, 10065 const int __imm) 10066 { 10067 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, 10068 __imm, 10069 (__v4si) __W, 10070 (__mmask8) 10071 __U); 10072 } 10073 10074 extern __inline __m128i 10075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10076 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A, 10077 const int __imm) 10078 { 10079 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A, 10080 __imm, 10081 (__v4si) 10082 _mm_setzero_si128 (), 10083 (__mmask8) 10084 __U); 10085 } 10086 10087 extern __inline __m128 10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10089 _mm256_extractf32x4_ps (__m256 __A, const int __imm) 10090 { 10091 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, 10092 __imm, 10093 (__v4sf) 10094 _mm_setzero_ps (), 10095 (__mmask8) -1); 10096 } 10097 10098 extern __inline __m128 10099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10100 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A, 10101 const int __imm) 10102 { 10103 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, 10104 __imm, 10105 (__v4sf) __W, 10106 (__mmask8) 10107 __U); 10108 } 10109 10110 extern __inline __m128 10111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10112 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A, 10113 const int __imm) 10114 { 10115 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A, 10116 __imm, 10117 (__v4sf) 10118 _mm_setzero_ps (), 10119 (__mmask8) 10120 __U); 10121 } 10122 10123 extern __inline __m256i 10124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10125 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm) 10126 { 10127 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, 10128 (__v4di) __B, 10129 __imm, 10130 (__v4di) 10131 _mm256_setzero_si256 (), 10132 (__mmask8) -1); 10133 } 10134 10135 extern __inline __m256i 10136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10137 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A, 10138 __m256i __B, const int __imm) 10139 { 10140 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, 10141 (__v4di) __B, 10142 __imm, 10143 (__v4di) __W, 10144 (__mmask8) __U); 10145 } 10146 10147 extern __inline __m256i 10148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10149 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B, 10150 const int __imm) 10151 { 10152 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A, 10153 (__v4di) __B, 10154 __imm, 10155 (__v4di) 10156 _mm256_setzero_si256 (), 10157 (__mmask8) __U); 10158 } 10159 10160 extern __inline __m256i 10161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10162 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm) 10163 { 10164 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, 10165 (__v8si) __B, 10166 __imm, 10167 (__v8si) 10168 _mm256_setzero_si256 (), 10169 (__mmask8) -1); 10170 } 10171 10172 extern __inline __m256i 10173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10174 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A, 10175 __m256i __B, const int __imm) 10176 { 10177 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, 10178 (__v8si) __B, 10179 __imm, 10180 (__v8si) __W, 10181 (__mmask8) __U); 10182 } 10183 10184 extern __inline __m256i 10185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10186 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B, 10187 const int __imm) 10188 { 10189 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A, 10190 (__v8si) __B, 10191 __imm, 10192 (__v8si) 10193 _mm256_setzero_si256 (), 10194 (__mmask8) __U); 10195 } 10196 10197 extern __inline __m256d 10198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10199 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm) 10200 { 10201 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, 10202 (__v4df) __B, 10203 __imm, 10204 (__v4df) 10205 _mm256_setzero_pd (), 10206 (__mmask8) -1); 10207 } 10208 10209 extern __inline __m256d 10210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10211 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A, 10212 __m256d __B, const int __imm) 10213 { 10214 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, 10215 (__v4df) __B, 10216 __imm, 10217 (__v4df) __W, 10218 (__mmask8) __U); 10219 } 10220 10221 extern __inline __m256d 10222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10223 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B, 10224 const int __imm) 10225 { 10226 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A, 10227 (__v4df) __B, 10228 __imm, 10229 (__v4df) 10230 _mm256_setzero_pd (), 10231 (__mmask8) __U); 10232 } 10233 10234 extern __inline __m256 10235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10236 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm) 10237 { 10238 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, 10239 (__v8sf) __B, 10240 __imm, 10241 (__v8sf) 10242 _mm256_setzero_ps (), 10243 (__mmask8) -1); 10244 } 10245 10246 extern __inline __m256 10247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10248 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A, 10249 __m256 __B, const int __imm) 10250 { 10251 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, 10252 (__v8sf) __B, 10253 __imm, 10254 (__v8sf) __W, 10255 (__mmask8) __U); 10256 } 10257 10258 extern __inline __m256 10259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10260 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B, 10261 const int __imm) 10262 { 10263 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A, 10264 (__v8sf) __B, 10265 __imm, 10266 (__v8sf) 10267 _mm256_setzero_ps (), 10268 (__mmask8) __U); 10269 } 10270 10271 extern __inline __m256d 10272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10273 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C, 10274 const int __imm) 10275 { 10276 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, 10277 (__v4df) __B, 10278 (__v4di) __C, 10279 __imm, 10280 (__mmask8) -1); 10281 } 10282 10283 extern __inline __m256d 10284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10285 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B, 10286 __m256i __C, const int __imm) 10287 { 10288 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A, 10289 (__v4df) __B, 10290 (__v4di) __C, 10291 __imm, 10292 (__mmask8) __U); 10293 } 10294 10295 extern __inline __m256d 10296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10297 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B, 10298 __m256i __C, const int __imm) 10299 { 10300 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A, 10301 (__v4df) __B, 10302 (__v4di) __C, 10303 __imm, 10304 (__mmask8) __U); 10305 } 10306 10307 extern __inline __m256 10308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10309 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C, 10310 const int __imm) 10311 { 10312 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, 10313 (__v8sf) __B, 10314 (__v8si) __C, 10315 __imm, 10316 (__mmask8) -1); 10317 } 10318 10319 extern __inline __m256 10320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10321 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B, 10322 __m256i __C, const int __imm) 10323 { 10324 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A, 10325 (__v8sf) __B, 10326 (__v8si) __C, 10327 __imm, 10328 (__mmask8) __U); 10329 } 10330 10331 extern __inline __m256 10332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10333 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B, 10334 __m256i __C, const int __imm) 10335 { 10336 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A, 10337 (__v8sf) __B, 10338 (__v8si) __C, 10339 __imm, 10340 (__mmask8) __U); 10341 } 10342 10343 extern __inline __m128d 10344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10345 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C, 10346 const int __imm) 10347 { 10348 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, 10349 (__v2df) __B, 10350 (__v2di) __C, 10351 __imm, 10352 (__mmask8) -1); 10353 } 10354 10355 extern __inline __m128d 10356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10357 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B, 10358 __m128i __C, const int __imm) 10359 { 10360 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A, 10361 (__v2df) __B, 10362 (__v2di) __C, 10363 __imm, 10364 (__mmask8) __U); 10365 } 10366 10367 extern __inline __m128d 10368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10369 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B, 10370 __m128i __C, const int __imm) 10371 { 10372 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A, 10373 (__v2df) __B, 10374 (__v2di) __C, 10375 __imm, 10376 (__mmask8) __U); 10377 } 10378 10379 extern __inline __m128 10380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10381 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm) 10382 { 10383 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, 10384 (__v4sf) __B, 10385 (__v4si) __C, 10386 __imm, 10387 (__mmask8) -1); 10388 } 10389 10390 extern __inline __m128 10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10392 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B, 10393 __m128i __C, const int __imm) 10394 { 10395 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A, 10396 (__v4sf) __B, 10397 (__v4si) __C, 10398 __imm, 10399 (__mmask8) __U); 10400 } 10401 10402 extern __inline __m128 10403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10404 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B, 10405 __m128i __C, const int __imm) 10406 { 10407 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A, 10408 (__v4sf) __B, 10409 (__v4si) __C, 10410 __imm, 10411 (__mmask8) __U); 10412 } 10413 10414 extern __inline __m256i 10415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10416 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 10417 const int __imm) 10418 { 10419 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, 10420 (__v8si) __W, 10421 (__mmask8) __U); 10422 } 10423 10424 extern __inline __m256i 10425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10426 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm) 10427 { 10428 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm, 10429 (__v8si) 10430 _mm256_setzero_si256 (), 10431 (__mmask8) __U); 10432 } 10433 10434 extern __inline __m128i 10435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10436 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 10437 const int __imm) 10438 { 10439 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, 10440 (__v4si) __W, 10441 (__mmask8) __U); 10442 } 10443 10444 extern __inline __m128i 10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10446 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm) 10447 { 10448 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm, 10449 (__v4si) 10450 _mm_setzero_si128 (), 10451 (__mmask8) __U); 10452 } 10453 10454 extern __inline __m256i 10455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10456 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 10457 const int __imm) 10458 { 10459 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, 10460 (__v4di) __W, 10461 (__mmask8) __U); 10462 } 10463 10464 extern __inline __m256i 10465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10466 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm) 10467 { 10468 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm, 10469 (__v4di) 10470 _mm256_setzero_si256 (), 10471 (__mmask8) __U); 10472 } 10473 10474 extern __inline __m128i 10475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10476 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 10477 const int __imm) 10478 { 10479 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, 10480 (__v2di) __W, 10481 (__mmask8) __U); 10482 } 10483 10484 extern __inline __m128i 10485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10486 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm) 10487 { 10488 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm, 10489 (__v2di) 10490 _mm_setzero_si128 (), 10491 (__mmask8) __U); 10492 } 10493 10494 extern __inline __m256i 10495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10496 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C, 10497 const int __imm) 10498 { 10499 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, 10500 (__v4di) __B, 10501 (__v4di) __C, __imm, 10502 (__mmask8) -1); 10503 } 10504 10505 extern __inline __m256i 10506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10507 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U, 10508 __m256i __B, __m256i __C, 10509 const int __imm) 10510 { 10511 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, 10512 (__v4di) __B, 10513 (__v4di) __C, __imm, 10514 (__mmask8) __U); 10515 } 10516 10517 extern __inline __m256i 10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10519 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A, 10520 __m256i __B, __m256i __C, 10521 const int __imm) 10522 { 10523 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A, 10524 (__v4di) __B, 10525 (__v4di) __C, 10526 __imm, 10527 (__mmask8) __U); 10528 } 10529 10530 extern __inline __m256i 10531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10532 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C, 10533 const int __imm) 10534 { 10535 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, 10536 (__v8si) __B, 10537 (__v8si) __C, __imm, 10538 (__mmask8) -1); 10539 } 10540 10541 extern __inline __m256i 10542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10543 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U, 10544 __m256i __B, __m256i __C, 10545 const int __imm) 10546 { 10547 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, 10548 (__v8si) __B, 10549 (__v8si) __C, __imm, 10550 (__mmask8) __U); 10551 } 10552 10553 extern __inline __m256i 10554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10555 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A, 10556 __m256i __B, __m256i __C, 10557 const int __imm) 10558 { 10559 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A, 10560 (__v8si) __B, 10561 (__v8si) __C, 10562 __imm, 10563 (__mmask8) __U); 10564 } 10565 10566 extern __inline __m128i 10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10568 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C, 10569 const int __imm) 10570 { 10571 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, 10572 (__v2di) __B, 10573 (__v2di) __C, __imm, 10574 (__mmask8) -1); 10575 } 10576 10577 extern __inline __m128i 10578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10579 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U, 10580 __m128i __B, __m128i __C, const int __imm) 10581 { 10582 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, 10583 (__v2di) __B, 10584 (__v2di) __C, __imm, 10585 (__mmask8) __U); 10586 } 10587 10588 extern __inline __m128i 10589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10590 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A, 10591 __m128i __B, __m128i __C, const int __imm) 10592 { 10593 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A, 10594 (__v2di) __B, 10595 (__v2di) __C, 10596 __imm, 10597 (__mmask8) __U); 10598 } 10599 10600 extern __inline __m128i 10601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10602 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C, 10603 const int __imm) 10604 { 10605 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, 10606 (__v4si) __B, 10607 (__v4si) __C, __imm, 10608 (__mmask8) -1); 10609 } 10610 10611 extern __inline __m128i 10612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10613 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U, 10614 __m128i __B, __m128i __C, const int __imm) 10615 { 10616 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, 10617 (__v4si) __B, 10618 (__v4si) __C, __imm, 10619 (__mmask8) __U); 10620 } 10621 10622 extern __inline __m128i 10623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10624 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A, 10625 __m128i __B, __m128i __C, const int __imm) 10626 { 10627 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A, 10628 (__v4si) __B, 10629 (__v4si) __C, 10630 __imm, 10631 (__mmask8) __U); 10632 } 10633 10634 extern __inline __m256 10635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10636 _mm256_roundscale_ps (__m256 __A, const int __imm) 10637 { 10638 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, 10639 __imm, 10640 (__v8sf) 10641 _mm256_setzero_ps (), 10642 (__mmask8) -1); 10643 } 10644 10645 extern __inline __m256 10646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10647 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A, 10648 const int __imm) 10649 { 10650 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, 10651 __imm, 10652 (__v8sf) __W, 10653 (__mmask8) __U); 10654 } 10655 10656 extern __inline __m256 10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10658 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm) 10659 { 10660 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, 10661 __imm, 10662 (__v8sf) 10663 _mm256_setzero_ps (), 10664 (__mmask8) __U); 10665 } 10666 10667 extern __inline __m256d 10668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10669 _mm256_roundscale_pd (__m256d __A, const int __imm) 10670 { 10671 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, 10672 __imm, 10673 (__v4df) 10674 _mm256_setzero_pd (), 10675 (__mmask8) -1); 10676 } 10677 10678 extern __inline __m256d 10679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10680 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A, 10681 const int __imm) 10682 { 10683 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, 10684 __imm, 10685 (__v4df) __W, 10686 (__mmask8) __U); 10687 } 10688 10689 extern __inline __m256d 10690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10691 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm) 10692 { 10693 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, 10694 __imm, 10695 (__v4df) 10696 _mm256_setzero_pd (), 10697 (__mmask8) __U); 10698 } 10699 10700 extern __inline __m128 10701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10702 _mm_roundscale_ps (__m128 __A, const int __imm) 10703 { 10704 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, 10705 __imm, 10706 (__v4sf) 10707 _mm_setzero_ps (), 10708 (__mmask8) -1); 10709 } 10710 10711 extern __inline __m128 10712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10713 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A, 10714 const int __imm) 10715 { 10716 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, 10717 __imm, 10718 (__v4sf) __W, 10719 (__mmask8) __U); 10720 } 10721 10722 extern __inline __m128 10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10724 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm) 10725 { 10726 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, 10727 __imm, 10728 (__v4sf) 10729 _mm_setzero_ps (), 10730 (__mmask8) __U); 10731 } 10732 10733 extern __inline __m128d 10734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10735 _mm_roundscale_pd (__m128d __A, const int __imm) 10736 { 10737 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, 10738 __imm, 10739 (__v2df) 10740 _mm_setzero_pd (), 10741 (__mmask8) -1); 10742 } 10743 10744 extern __inline __m128d 10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10746 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A, 10747 const int __imm) 10748 { 10749 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, 10750 __imm, 10751 (__v2df) __W, 10752 (__mmask8) __U); 10753 } 10754 10755 extern __inline __m128d 10756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10757 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm) 10758 { 10759 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, 10760 __imm, 10761 (__v2df) 10762 _mm_setzero_pd (), 10763 (__mmask8) __U); 10764 } 10765 10766 extern __inline __m256 10767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10768 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B, 10769 _MM_MANTISSA_SIGN_ENUM __C) 10770 { 10771 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, 10772 (__C << 2) | __B, 10773 (__v8sf) 10774 _mm256_setzero_ps (), 10775 (__mmask8) -1); 10776 } 10777 10778 extern __inline __m256 10779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10780 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A, 10781 _MM_MANTISSA_NORM_ENUM __B, 10782 _MM_MANTISSA_SIGN_ENUM __C) 10783 { 10784 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, 10785 (__C << 2) | __B, 10786 (__v8sf) __W, 10787 (__mmask8) __U); 10788 } 10789 10790 extern __inline __m256 10791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10792 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A, 10793 _MM_MANTISSA_NORM_ENUM __B, 10794 _MM_MANTISSA_SIGN_ENUM __C) 10795 { 10796 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A, 10797 (__C << 2) | __B, 10798 (__v8sf) 10799 _mm256_setzero_ps (), 10800 (__mmask8) __U); 10801 } 10802 10803 extern __inline __m128 10804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10805 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B, 10806 _MM_MANTISSA_SIGN_ENUM __C) 10807 { 10808 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, 10809 (__C << 2) | __B, 10810 (__v4sf) 10811 _mm_setzero_ps (), 10812 (__mmask8) -1); 10813 } 10814 10815 extern __inline __m128 10816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10817 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A, 10818 _MM_MANTISSA_NORM_ENUM __B, 10819 _MM_MANTISSA_SIGN_ENUM __C) 10820 { 10821 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, 10822 (__C << 2) | __B, 10823 (__v4sf) __W, 10824 (__mmask8) __U); 10825 } 10826 10827 extern __inline __m128 10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10829 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A, 10830 _MM_MANTISSA_NORM_ENUM __B, 10831 _MM_MANTISSA_SIGN_ENUM __C) 10832 { 10833 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A, 10834 (__C << 2) | __B, 10835 (__v4sf) 10836 _mm_setzero_ps (), 10837 (__mmask8) __U); 10838 } 10839 10840 extern __inline __m256d 10841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10842 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B, 10843 _MM_MANTISSA_SIGN_ENUM __C) 10844 { 10845 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, 10846 (__C << 2) | __B, 10847 (__v4df) 10848 _mm256_setzero_pd (), 10849 (__mmask8) -1); 10850 } 10851 10852 extern __inline __m256d 10853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10854 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A, 10855 _MM_MANTISSA_NORM_ENUM __B, 10856 _MM_MANTISSA_SIGN_ENUM __C) 10857 { 10858 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, 10859 (__C << 2) | __B, 10860 (__v4df) __W, 10861 (__mmask8) __U); 10862 } 10863 10864 extern __inline __m256d 10865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10866 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A, 10867 _MM_MANTISSA_NORM_ENUM __B, 10868 _MM_MANTISSA_SIGN_ENUM __C) 10869 { 10870 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A, 10871 (__C << 2) | __B, 10872 (__v4df) 10873 _mm256_setzero_pd (), 10874 (__mmask8) __U); 10875 } 10876 10877 extern __inline __m128d 10878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10879 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B, 10880 _MM_MANTISSA_SIGN_ENUM __C) 10881 { 10882 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, 10883 (__C << 2) | __B, 10884 (__v2df) 10885 _mm_setzero_pd (), 10886 (__mmask8) -1); 10887 } 10888 10889 extern __inline __m128d 10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10891 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A, 10892 _MM_MANTISSA_NORM_ENUM __B, 10893 _MM_MANTISSA_SIGN_ENUM __C) 10894 { 10895 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, 10896 (__C << 2) | __B, 10897 (__v2df) __W, 10898 (__mmask8) __U); 10899 } 10900 10901 extern __inline __m128d 10902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10903 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A, 10904 _MM_MANTISSA_NORM_ENUM __B, 10905 _MM_MANTISSA_SIGN_ENUM __C) 10906 { 10907 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A, 10908 (__C << 2) | __B, 10909 (__v2df) 10910 _mm_setzero_pd (), 10911 (__mmask8) __U); 10912 } 10913 10914 extern __inline __m256 10915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10916 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask, 10917 __m256i __index, void const *__addr, 10918 int __scale) 10919 { 10920 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old, 10921 __addr, 10922 (__v8si) __index, 10923 __mask, __scale); 10924 } 10925 10926 extern __inline __m128 10927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10928 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask, 10929 __m128i __index, void const *__addr, 10930 int __scale) 10931 { 10932 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old, 10933 __addr, 10934 (__v4si) __index, 10935 __mask, __scale); 10936 } 10937 10938 extern __inline __m256d 10939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10940 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask, 10941 __m128i __index, void const *__addr, 10942 int __scale) 10943 { 10944 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old, 10945 __addr, 10946 (__v4si) __index, 10947 __mask, __scale); 10948 } 10949 10950 extern __inline __m128d 10951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10952 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask, 10953 __m128i __index, void const *__addr, 10954 int __scale) 10955 { 10956 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old, 10957 __addr, 10958 (__v4si) __index, 10959 __mask, __scale); 10960 } 10961 10962 extern __inline __m128 10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10964 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, 10965 __m256i __index, void const *__addr, 10966 int __scale) 10967 { 10968 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old, 10969 __addr, 10970 (__v4di) __index, 10971 __mask, __scale); 10972 } 10973 10974 extern __inline __m128 10975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10976 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask, 10977 __m128i __index, void const *__addr, 10978 int __scale) 10979 { 10980 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old, 10981 __addr, 10982 (__v2di) __index, 10983 __mask, __scale); 10984 } 10985 10986 extern __inline __m256d 10987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10988 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask, 10989 __m256i __index, void const *__addr, 10990 int __scale) 10991 { 10992 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old, 10993 __addr, 10994 (__v4di) __index, 10995 __mask, __scale); 10996 } 10997 10998 extern __inline __m128d 10999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11000 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask, 11001 __m128i __index, void const *__addr, 11002 int __scale) 11003 { 11004 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old, 11005 __addr, 11006 (__v2di) __index, 11007 __mask, __scale); 11008 } 11009 11010 extern __inline __m256i 11011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11012 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask, 11013 __m256i __index, void const *__addr, 11014 int __scale) 11015 { 11016 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old, 11017 __addr, 11018 (__v8si) __index, 11019 __mask, __scale); 11020 } 11021 11022 extern __inline __m128i 11023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11024 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask, 11025 __m128i __index, void const *__addr, 11026 int __scale) 11027 { 11028 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old, 11029 __addr, 11030 (__v4si) __index, 11031 __mask, __scale); 11032 } 11033 11034 extern __inline __m256i 11035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11036 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask, 11037 __m128i __index, void const *__addr, 11038 int __scale) 11039 { 11040 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old, 11041 __addr, 11042 (__v4si) __index, 11043 __mask, __scale); 11044 } 11045 11046 extern __inline __m128i 11047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11048 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask, 11049 __m128i __index, void const *__addr, 11050 int __scale) 11051 { 11052 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old, 11053 __addr, 11054 (__v4si) __index, 11055 __mask, __scale); 11056 } 11057 11058 extern __inline __m128i 11059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11060 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, 11061 __m256i __index, void const *__addr, 11062 int __scale) 11063 { 11064 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old, 11065 __addr, 11066 (__v4di) __index, 11067 __mask, __scale); 11068 } 11069 11070 extern __inline __m128i 11071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11072 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask, 11073 __m128i __index, void const *__addr, 11074 int __scale) 11075 { 11076 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old, 11077 __addr, 11078 (__v2di) __index, 11079 __mask, __scale); 11080 } 11081 11082 extern __inline __m256i 11083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11084 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask, 11085 __m256i __index, void const *__addr, 11086 int __scale) 11087 { 11088 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old, 11089 __addr, 11090 (__v4di) __index, 11091 __mask, __scale); 11092 } 11093 11094 extern __inline __m128i 11095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11096 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask, 11097 __m128i __index, void const *__addr, 11098 int __scale) 11099 { 11100 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old, 11101 __addr, 11102 (__v2di) __index, 11103 __mask, __scale); 11104 } 11105 11106 extern __inline void 11107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11108 _mm256_i32scatter_ps (void *__addr, __m256i __index, 11109 __m256 __v1, const int __scale) 11110 { 11111 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF, 11112 (__v8si) __index, (__v8sf) __v1, 11113 __scale); 11114 } 11115 11116 extern __inline void 11117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11118 _mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask, 11119 __m256i __index, __m256 __v1, 11120 const int __scale) 11121 { 11122 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index, 11123 (__v8sf) __v1, __scale); 11124 } 11125 11126 extern __inline void 11127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11128 _mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1, 11129 const int __scale) 11130 { 11131 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF, 11132 (__v4si) __index, (__v4sf) __v1, 11133 __scale); 11134 } 11135 11136 extern __inline void 11137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11138 _mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask, 11139 __m128i __index, __m128 __v1, 11140 const int __scale) 11141 { 11142 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, 11143 (__v4sf) __v1, __scale); 11144 } 11145 11146 extern __inline void 11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11148 _mm256_i32scatter_pd (void *__addr, __m128i __index, 11149 __m256d __v1, const int __scale) 11150 { 11151 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF, 11152 (__v4si) __index, (__v4df) __v1, 11153 __scale); 11154 } 11155 11156 extern __inline void 11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11158 _mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask, 11159 __m128i __index, __m256d __v1, 11160 const int __scale) 11161 { 11162 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, 11163 (__v4df) __v1, __scale); 11164 } 11165 11166 extern __inline void 11167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11168 _mm_i32scatter_pd (void *__addr, __m128i __index, 11169 __m128d __v1, const int __scale) 11170 { 11171 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, 11172 (__v4si) __index, (__v2df) __v1, 11173 __scale); 11174 } 11175 11176 extern __inline void 11177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11178 _mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask, 11179 __m128i __index, __m128d __v1, 11180 const int __scale) 11181 { 11182 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index, 11183 (__v2df) __v1, __scale); 11184 } 11185 11186 extern __inline void 11187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11188 _mm256_i64scatter_ps (void *__addr, __m256i __index, 11189 __m128 __v1, const int __scale) 11190 { 11191 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, 11192 (__v4di) __index, (__v4sf) __v1, 11193 __scale); 11194 } 11195 11196 extern __inline void 11197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11198 _mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask, 11199 __m256i __index, __m128 __v1, 11200 const int __scale) 11201 { 11202 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, 11203 (__v4sf) __v1, __scale); 11204 } 11205 11206 extern __inline void 11207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11208 _mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1, 11209 const int __scale) 11210 { 11211 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF, 11212 (__v2di) __index, (__v4sf) __v1, 11213 __scale); 11214 } 11215 11216 extern __inline void 11217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11218 _mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask, 11219 __m128i __index, __m128 __v1, 11220 const int __scale) 11221 { 11222 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index, 11223 (__v4sf) __v1, __scale); 11224 } 11225 11226 extern __inline void 11227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11228 _mm256_i64scatter_pd (void *__addr, __m256i __index, 11229 __m256d __v1, const int __scale) 11230 { 11231 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF, 11232 (__v4di) __index, (__v4df) __v1, 11233 __scale); 11234 } 11235 11236 extern __inline void 11237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11238 _mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask, 11239 __m256i __index, __m256d __v1, 11240 const int __scale) 11241 { 11242 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index, 11243 (__v4df) __v1, __scale); 11244 } 11245 11246 extern __inline void 11247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11248 _mm_i64scatter_pd (void *__addr, __m128i __index, 11249 __m128d __v1, const int __scale) 11250 { 11251 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF, 11252 (__v2di) __index, (__v2df) __v1, 11253 __scale); 11254 } 11255 11256 extern __inline void 11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11258 _mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask, 11259 __m128i __index, __m128d __v1, 11260 const int __scale) 11261 { 11262 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, 11263 (__v2df) __v1, __scale); 11264 } 11265 11266 extern __inline void 11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11268 _mm256_i32scatter_epi32 (void *__addr, __m256i __index, 11269 __m256i __v1, const int __scale) 11270 { 11271 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF, 11272 (__v8si) __index, (__v8si) __v1, 11273 __scale); 11274 } 11275 11276 extern __inline void 11277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11278 _mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, 11279 __m256i __index, __m256i __v1, 11280 const int __scale) 11281 { 11282 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index, 11283 (__v8si) __v1, __scale); 11284 } 11285 11286 extern __inline void 11287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11288 _mm_i32scatter_epi32 (void *__addr, __m128i __index, 11289 __m128i __v1, const int __scale) 11290 { 11291 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF, 11292 (__v4si) __index, (__v4si) __v1, 11293 __scale); 11294 } 11295 11296 extern __inline void 11297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11298 _mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask, 11299 __m128i __index, __m128i __v1, 11300 const int __scale) 11301 { 11302 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index, 11303 (__v4si) __v1, __scale); 11304 } 11305 11306 extern __inline void 11307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11308 _mm256_i32scatter_epi64 (void *__addr, __m128i __index, 11309 __m256i __v1, const int __scale) 11310 { 11311 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF, 11312 (__v4si) __index, (__v4di) __v1, 11313 __scale); 11314 } 11315 11316 extern __inline void 11317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11318 _mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, 11319 __m128i __index, __m256i __v1, 11320 const int __scale) 11321 { 11322 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index, 11323 (__v4di) __v1, __scale); 11324 } 11325 11326 extern __inline void 11327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11328 _mm_i32scatter_epi64 (void *__addr, __m128i __index, 11329 __m128i __v1, const int __scale) 11330 { 11331 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF, 11332 (__v4si) __index, (__v2di) __v1, 11333 __scale); 11334 } 11335 11336 extern __inline void 11337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11338 _mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, 11339 __m128i __index, __m128i __v1, 11340 const int __scale) 11341 { 11342 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, 11343 (__v2di) __v1, __scale); 11344 } 11345 11346 extern __inline void 11347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11348 _mm256_i64scatter_epi32 (void *__addr, __m256i __index, 11349 __m128i __v1, const int __scale) 11350 { 11351 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, 11352 (__v4di) __index, (__v4si) __v1, 11353 __scale); 11354 } 11355 11356 extern __inline void 11357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11358 _mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, 11359 __m256i __index, __m128i __v1, 11360 const int __scale) 11361 { 11362 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index, 11363 (__v4si) __v1, __scale); 11364 } 11365 11366 extern __inline void 11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11368 _mm_i64scatter_epi32 (void *__addr, __m128i __index, 11369 __m128i __v1, const int __scale) 11370 { 11371 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF, 11372 (__v2di) __index, (__v4si) __v1, 11373 __scale); 11374 } 11375 11376 extern __inline void 11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11378 _mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, 11379 __m128i __index, __m128i __v1, 11380 const int __scale) 11381 { 11382 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index, 11383 (__v4si) __v1, __scale); 11384 } 11385 11386 extern __inline void 11387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11388 _mm256_i64scatter_epi64 (void *__addr, __m256i __index, 11389 __m256i __v1, const int __scale) 11390 { 11391 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, 11392 (__v4di) __index, (__v4di) __v1, 11393 __scale); 11394 } 11395 11396 extern __inline void 11397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11398 _mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, 11399 __m256i __index, __m256i __v1, 11400 const int __scale) 11401 { 11402 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index, 11403 (__v4di) __v1, __scale); 11404 } 11405 11406 extern __inline void 11407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11408 _mm_i64scatter_epi64 (void *__addr, __m128i __index, 11409 __m128i __v1, const int __scale) 11410 { 11411 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, 11412 (__v2di) __index, (__v2di) __v1, 11413 __scale); 11414 } 11415 11416 extern __inline void 11417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11418 _mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, 11419 __m128i __index, __m128i __v1, 11420 const int __scale) 11421 { 11422 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index, 11423 (__v2di) __v1, __scale); 11424 } 11425 11426 extern __inline __m256i 11427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11428 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 11429 _MM_PERM_ENUM __mask) 11430 { 11431 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, 11432 (__v8si) __W, 11433 (__mmask8) __U); 11434 } 11435 11436 extern __inline __m256i 11437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11438 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A, 11439 _MM_PERM_ENUM __mask) 11440 { 11441 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask, 11442 (__v8si) 11443 _mm256_setzero_si256 (), 11444 (__mmask8) __U); 11445 } 11446 11447 extern __inline __m128i 11448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11449 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 11450 _MM_PERM_ENUM __mask) 11451 { 11452 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, 11453 (__v4si) __W, 11454 (__mmask8) __U); 11455 } 11456 11457 extern __inline __m128i 11458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11459 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A, 11460 _MM_PERM_ENUM __mask) 11461 { 11462 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask, 11463 (__v4si) 11464 _mm_setzero_si128 (), 11465 (__mmask8) __U); 11466 } 11467 11468 extern __inline __m256i 11469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11470 _mm256_rol_epi32 (__m256i __A, const int __B) 11471 { 11472 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, 11473 (__v8si) 11474 _mm256_setzero_si256 (), 11475 (__mmask8) -1); 11476 } 11477 11478 extern __inline __m256i 11479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11480 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 11481 const int __B) 11482 { 11483 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, 11484 (__v8si) __W, 11485 (__mmask8) __U); 11486 } 11487 11488 extern __inline __m256i 11489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11490 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B) 11491 { 11492 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B, 11493 (__v8si) 11494 _mm256_setzero_si256 (), 11495 (__mmask8) __U); 11496 } 11497 11498 extern __inline __m128i 11499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11500 _mm_rol_epi32 (__m128i __A, const int __B) 11501 { 11502 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, 11503 (__v4si) 11504 _mm_setzero_si128 (), 11505 (__mmask8) -1); 11506 } 11507 11508 extern __inline __m128i 11509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11510 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 11511 const int __B) 11512 { 11513 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, 11514 (__v4si) __W, 11515 (__mmask8) __U); 11516 } 11517 11518 extern __inline __m128i 11519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11520 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B) 11521 { 11522 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B, 11523 (__v4si) 11524 _mm_setzero_si128 (), 11525 (__mmask8) __U); 11526 } 11527 11528 extern __inline __m256i 11529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11530 _mm256_ror_epi32 (__m256i __A, const int __B) 11531 { 11532 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, 11533 (__v8si) 11534 _mm256_setzero_si256 (), 11535 (__mmask8) -1); 11536 } 11537 11538 extern __inline __m256i 11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11540 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 11541 const int __B) 11542 { 11543 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, 11544 (__v8si) __W, 11545 (__mmask8) __U); 11546 } 11547 11548 extern __inline __m256i 11549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11550 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B) 11551 { 11552 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B, 11553 (__v8si) 11554 _mm256_setzero_si256 (), 11555 (__mmask8) __U); 11556 } 11557 11558 extern __inline __m128i 11559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11560 _mm_ror_epi32 (__m128i __A, const int __B) 11561 { 11562 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, 11563 (__v4si) 11564 _mm_setzero_si128 (), 11565 (__mmask8) -1); 11566 } 11567 11568 extern __inline __m128i 11569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11570 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 11571 const int __B) 11572 { 11573 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, 11574 (__v4si) __W, 11575 (__mmask8) __U); 11576 } 11577 11578 extern __inline __m128i 11579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11580 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B) 11581 { 11582 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B, 11583 (__v4si) 11584 _mm_setzero_si128 (), 11585 (__mmask8) __U); 11586 } 11587 11588 extern __inline __m256i 11589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11590 _mm256_rol_epi64 (__m256i __A, const int __B) 11591 { 11592 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, 11593 (__v4di) 11594 _mm256_setzero_si256 (), 11595 (__mmask8) -1); 11596 } 11597 11598 extern __inline __m256i 11599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11600 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 11601 const int __B) 11602 { 11603 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, 11604 (__v4di) __W, 11605 (__mmask8) __U); 11606 } 11607 11608 extern __inline __m256i 11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11610 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B) 11611 { 11612 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B, 11613 (__v4di) 11614 _mm256_setzero_si256 (), 11615 (__mmask8) __U); 11616 } 11617 11618 extern __inline __m128i 11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11620 _mm_rol_epi64 (__m128i __A, const int __B) 11621 { 11622 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, 11623 (__v2di) 11624 _mm_setzero_si128 (), 11625 (__mmask8) -1); 11626 } 11627 11628 extern __inline __m128i 11629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11630 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 11631 const int __B) 11632 { 11633 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, 11634 (__v2di) __W, 11635 (__mmask8) __U); 11636 } 11637 11638 extern __inline __m128i 11639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11640 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B) 11641 { 11642 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B, 11643 (__v2di) 11644 _mm_setzero_si128 (), 11645 (__mmask8) __U); 11646 } 11647 11648 extern __inline __m256i 11649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11650 _mm256_ror_epi64 (__m256i __A, const int __B) 11651 { 11652 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, 11653 (__v4di) 11654 _mm256_setzero_si256 (), 11655 (__mmask8) -1); 11656 } 11657 11658 extern __inline __m256i 11659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11660 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 11661 const int __B) 11662 { 11663 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, 11664 (__v4di) __W, 11665 (__mmask8) __U); 11666 } 11667 11668 extern __inline __m256i 11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11670 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B) 11671 { 11672 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B, 11673 (__v4di) 11674 _mm256_setzero_si256 (), 11675 (__mmask8) __U); 11676 } 11677 11678 extern __inline __m128i 11679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11680 _mm_ror_epi64 (__m128i __A, const int __B) 11681 { 11682 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, 11683 (__v2di) 11684 _mm_setzero_si128 (), 11685 (__mmask8) -1); 11686 } 11687 11688 extern __inline __m128i 11689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11690 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 11691 const int __B) 11692 { 11693 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, 11694 (__v2di) __W, 11695 (__mmask8) __U); 11696 } 11697 11698 extern __inline __m128i 11699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11700 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B) 11701 { 11702 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B, 11703 (__v2di) 11704 _mm_setzero_si128 (), 11705 (__mmask8) __U); 11706 } 11707 11708 extern __inline __m128i 11709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11710 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm) 11711 { 11712 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, 11713 (__v4si) __B, __imm, 11714 (__v4si) 11715 _mm_setzero_si128 (), 11716 (__mmask8) -1); 11717 } 11718 11719 extern __inline __m128i 11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11721 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 11722 __m128i __B, const int __imm) 11723 { 11724 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, 11725 (__v4si) __B, __imm, 11726 (__v4si) __W, 11727 (__mmask8) __U); 11728 } 11729 11730 extern __inline __m128i 11731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11732 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B, 11733 const int __imm) 11734 { 11735 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A, 11736 (__v4si) __B, __imm, 11737 (__v4si) 11738 _mm_setzero_si128 (), 11739 (__mmask8) __U); 11740 } 11741 11742 extern __inline __m128i 11743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11744 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm) 11745 { 11746 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, 11747 (__v2di) __B, __imm, 11748 (__v2di) 11749 _mm_setzero_si128 (), 11750 (__mmask8) -1); 11751 } 11752 11753 extern __inline __m128i 11754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11755 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 11756 __m128i __B, const int __imm) 11757 { 11758 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, 11759 (__v2di) __B, __imm, 11760 (__v2di) __W, 11761 (__mmask8) __U); 11762 } 11763 11764 extern __inline __m128i 11765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11766 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B, 11767 const int __imm) 11768 { 11769 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A, 11770 (__v2di) __B, __imm, 11771 (__v2di) 11772 _mm_setzero_si128 (), 11773 (__mmask8) __U); 11774 } 11775 11776 extern __inline __m256i 11777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11778 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm) 11779 { 11780 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, 11781 (__v8si) __B, __imm, 11782 (__v8si) 11783 _mm256_setzero_si256 (), 11784 (__mmask8) -1); 11785 } 11786 11787 extern __inline __m256i 11788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11789 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 11790 __m256i __B, const int __imm) 11791 { 11792 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, 11793 (__v8si) __B, __imm, 11794 (__v8si) __W, 11795 (__mmask8) __U); 11796 } 11797 11798 extern __inline __m256i 11799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11800 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B, 11801 const int __imm) 11802 { 11803 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A, 11804 (__v8si) __B, __imm, 11805 (__v8si) 11806 _mm256_setzero_si256 (), 11807 (__mmask8) __U); 11808 } 11809 11810 extern __inline __m256i 11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11812 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm) 11813 { 11814 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, 11815 (__v4di) __B, __imm, 11816 (__v4di) 11817 _mm256_setzero_si256 (), 11818 (__mmask8) -1); 11819 } 11820 11821 extern __inline __m256i 11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11823 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 11824 __m256i __B, const int __imm) 11825 { 11826 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, 11827 (__v4di) __B, __imm, 11828 (__v4di) __W, 11829 (__mmask8) __U); 11830 } 11831 11832 extern __inline __m256i 11833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11834 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B, 11835 const int __imm) 11836 { 11837 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A, 11838 (__v4di) __B, __imm, 11839 (__v4di) 11840 _mm256_setzero_si256 (), 11841 (__mmask8) __U); 11842 } 11843 11844 extern __inline __m128i 11845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11846 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A, 11847 const int __I) 11848 { 11849 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, 11850 (__v8hi) __W, 11851 (__mmask8) __U); 11852 } 11853 11854 extern __inline __m128i 11855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11856 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I) 11857 { 11858 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I, 11859 (__v8hi) 11860 _mm_setzero_si128 (), 11861 (__mmask8) __U); 11862 } 11863 11864 extern __inline __m128i 11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11866 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A, 11867 const int __I) 11868 { 11869 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, 11870 (__v8hi) __W, 11871 (__mmask8) __U); 11872 } 11873 11874 extern __inline __m128i 11875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11876 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I) 11877 { 11878 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I, 11879 (__v8hi) 11880 _mm_setzero_si128 (), 11881 (__mmask8) __U); 11882 } 11883 11884 extern __inline __m256i 11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11886 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 11887 const int __imm) 11888 { 11889 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, 11890 (__v8si) __W, 11891 (__mmask8) __U); 11892 } 11893 11894 extern __inline __m256i 11895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11896 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm) 11897 { 11898 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm, 11899 (__v8si) 11900 _mm256_setzero_si256 (), 11901 (__mmask8) __U); 11902 } 11903 11904 extern __inline __m128i 11905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11906 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 11907 const int __imm) 11908 { 11909 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, 11910 (__v4si) __W, 11911 (__mmask8) __U); 11912 } 11913 11914 extern __inline __m128i 11915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11916 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm) 11917 { 11918 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm, 11919 (__v4si) 11920 _mm_setzero_si128 (), 11921 (__mmask8) __U); 11922 } 11923 11924 extern __inline __m256i 11925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11926 _mm256_srai_epi64 (__m256i __A, const int __imm) 11927 { 11928 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, 11929 (__v4di) 11930 _mm256_setzero_si256 (), 11931 (__mmask8) -1); 11932 } 11933 11934 extern __inline __m256i 11935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11936 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 11937 const int __imm) 11938 { 11939 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, 11940 (__v4di) __W, 11941 (__mmask8) __U); 11942 } 11943 11944 extern __inline __m256i 11945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11946 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm) 11947 { 11948 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm, 11949 (__v4di) 11950 _mm256_setzero_si256 (), 11951 (__mmask8) __U); 11952 } 11953 11954 extern __inline __m128i 11955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11956 _mm_srai_epi64 (__m128i __A, const int __imm) 11957 { 11958 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, 11959 (__v2di) 11960 _mm_setzero_si128 (), 11961 (__mmask8) -1); 11962 } 11963 11964 extern __inline __m128i 11965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11966 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 11967 const int __imm) 11968 { 11969 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, 11970 (__v2di) __W, 11971 (__mmask8) __U); 11972 } 11973 11974 extern __inline __m128i 11975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11976 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm) 11977 { 11978 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm, 11979 (__v2di) 11980 _mm_setzero_si128 (), 11981 (__mmask8) __U); 11982 } 11983 11984 extern __inline __m128i 11985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11986 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B) 11987 { 11988 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, 11989 (__v4si) __W, 11990 (__mmask8) __U); 11991 } 11992 11993 extern __inline __m128i 11994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11995 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B) 11996 { 11997 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B, 11998 (__v4si) 11999 _mm_setzero_si128 (), 12000 (__mmask8) __U); 12001 } 12002 12003 extern __inline __m128i 12004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12005 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B) 12006 { 12007 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, 12008 (__v2di) __W, 12009 (__mmask8) __U); 12010 } 12011 12012 extern __inline __m128i 12013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12014 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B) 12015 { 12016 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B, 12017 (__v2di) 12018 _mm_setzero_si128 (), 12019 (__mmask8) __U); 12020 } 12021 12022 extern __inline __m256i 12023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12024 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 12025 int __B) 12026 { 12027 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, 12028 (__v8si) __W, 12029 (__mmask8) __U); 12030 } 12031 12032 extern __inline __m256i 12033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12034 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B) 12035 { 12036 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B, 12037 (__v8si) 12038 _mm256_setzero_si256 (), 12039 (__mmask8) __U); 12040 } 12041 12042 extern __inline __m256i 12043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12044 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 12045 int __B) 12046 { 12047 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, 12048 (__v4di) __W, 12049 (__mmask8) __U); 12050 } 12051 12052 extern __inline __m256i 12053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12054 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B) 12055 { 12056 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B, 12057 (__v4di) 12058 _mm256_setzero_si256 (), 12059 (__mmask8) __U); 12060 } 12061 12062 extern __inline __m256d 12063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12064 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X, 12065 const int __imm) 12066 { 12067 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, 12068 (__v4df) __W, 12069 (__mmask8) __U); 12070 } 12071 12072 extern __inline __m256d 12073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12074 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm) 12075 { 12076 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm, 12077 (__v4df) 12078 _mm256_setzero_pd (), 12079 (__mmask8) __U); 12080 } 12081 12082 extern __inline __m256d 12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12084 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X, 12085 const int __C) 12086 { 12087 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, 12088 (__v4df) __W, 12089 (__mmask8) __U); 12090 } 12091 12092 extern __inline __m256d 12093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12094 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C) 12095 { 12096 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C, 12097 (__v4df) 12098 _mm256_setzero_pd (), 12099 (__mmask8) __U); 12100 } 12101 12102 extern __inline __m128d 12103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12104 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X, 12105 const int __C) 12106 { 12107 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, 12108 (__v2df) __W, 12109 (__mmask8) __U); 12110 } 12111 12112 extern __inline __m128d 12113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12114 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C) 12115 { 12116 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C, 12117 (__v2df) 12118 _mm_setzero_pd (), 12119 (__mmask8) __U); 12120 } 12121 12122 extern __inline __m256 12123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12124 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X, 12125 const int __C) 12126 { 12127 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, 12128 (__v8sf) __W, 12129 (__mmask8) __U); 12130 } 12131 12132 extern __inline __m256 12133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12134 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C) 12135 { 12136 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C, 12137 (__v8sf) 12138 _mm256_setzero_ps (), 12139 (__mmask8) __U); 12140 } 12141 12142 extern __inline __m128 12143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12144 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X, 12145 const int __C) 12146 { 12147 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, 12148 (__v4sf) __W, 12149 (__mmask8) __U); 12150 } 12151 12152 extern __inline __m128 12153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12154 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C) 12155 { 12156 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C, 12157 (__v4sf) 12158 _mm_setzero_ps (), 12159 (__mmask8) __U); 12160 } 12161 12162 extern __inline __m256d 12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12164 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) 12165 { 12166 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, 12167 (__v4df) __W, 12168 (__mmask8) __U); 12169 } 12170 12171 extern __inline __m256 12172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12173 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) 12174 { 12175 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, 12176 (__v8sf) __W, 12177 (__mmask8) __U); 12178 } 12179 12180 extern __inline __m256i 12181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12182 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) 12183 { 12184 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, 12185 (__v4di) __W, 12186 (__mmask8) __U); 12187 } 12188 12189 extern __inline __m256i 12190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12191 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) 12192 { 12193 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, 12194 (__v8si) __W, 12195 (__mmask8) __U); 12196 } 12197 12198 extern __inline __m128d 12199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12200 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) 12201 { 12202 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, 12203 (__v2df) __W, 12204 (__mmask8) __U); 12205 } 12206 12207 extern __inline __m128 12208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12209 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) 12210 { 12211 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, 12212 (__v4sf) __W, 12213 (__mmask8) __U); 12214 } 12215 12216 extern __inline __m128i 12217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12218 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) 12219 { 12220 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, 12221 (__v2di) __W, 12222 (__mmask8) __U); 12223 } 12224 12225 extern __inline __m128i 12226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12227 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) 12228 { 12229 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, 12230 (__v4si) __W, 12231 (__mmask8) __U); 12232 } 12233 12234 extern __inline __mmask8 12235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12236 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P) 12237 { 12238 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 12239 (__v4di) __Y, __P, 12240 (__mmask8) -1); 12241 } 12242 12243 extern __inline __mmask8 12244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12245 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P) 12246 { 12247 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 12248 (__v8si) __Y, __P, 12249 (__mmask8) -1); 12250 } 12251 12252 extern __inline __mmask8 12253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12254 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P) 12255 { 12256 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 12257 (__v4di) __Y, __P, 12258 (__mmask8) -1); 12259 } 12260 12261 extern __inline __mmask8 12262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12263 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P) 12264 { 12265 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 12266 (__v8si) __Y, __P, 12267 (__mmask8) -1); 12268 } 12269 12270 extern __inline __mmask8 12271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12272 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P) 12273 { 12274 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, 12275 (__v4df) __Y, __P, 12276 (__mmask8) -1); 12277 } 12278 12279 extern __inline __mmask8 12280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12281 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P) 12282 { 12283 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, 12284 (__v8sf) __Y, __P, 12285 (__mmask8) -1); 12286 } 12287 12288 extern __inline __mmask8 12289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12290 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y, 12291 const int __P) 12292 { 12293 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, 12294 (__v4di) __Y, __P, 12295 (__mmask8) __U); 12296 } 12297 12298 extern __inline __mmask8 12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12300 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y, 12301 const int __P) 12302 { 12303 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, 12304 (__v8si) __Y, __P, 12305 (__mmask8) __U); 12306 } 12307 12308 extern __inline __mmask8 12309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12310 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y, 12311 const int __P) 12312 { 12313 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, 12314 (__v4di) __Y, __P, 12315 (__mmask8) __U); 12316 } 12317 12318 extern __inline __mmask8 12319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12320 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y, 12321 const int __P) 12322 { 12323 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, 12324 (__v8si) __Y, __P, 12325 (__mmask8) __U); 12326 } 12327 12328 extern __inline __mmask8 12329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12330 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y, 12331 const int __P) 12332 { 12333 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X, 12334 (__v4df) __Y, __P, 12335 (__mmask8) __U); 12336 } 12337 12338 extern __inline __mmask8 12339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12340 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y, 12341 const int __P) 12342 { 12343 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X, 12344 (__v8sf) __Y, __P, 12345 (__mmask8) __U); 12346 } 12347 12348 extern __inline __mmask8 12349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12350 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P) 12351 { 12352 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 12353 (__v2di) __Y, __P, 12354 (__mmask8) -1); 12355 } 12356 12357 extern __inline __mmask8 12358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12359 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P) 12360 { 12361 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 12362 (__v4si) __Y, __P, 12363 (__mmask8) -1); 12364 } 12365 12366 extern __inline __mmask8 12367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12368 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P) 12369 { 12370 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 12371 (__v2di) __Y, __P, 12372 (__mmask8) -1); 12373 } 12374 12375 extern __inline __mmask8 12376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12377 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P) 12378 { 12379 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 12380 (__v4si) __Y, __P, 12381 (__mmask8) -1); 12382 } 12383 12384 extern __inline __mmask8 12385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12386 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P) 12387 { 12388 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, 12389 (__v2df) __Y, __P, 12390 (__mmask8) -1); 12391 } 12392 12393 extern __inline __mmask8 12394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12395 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P) 12396 { 12397 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, 12398 (__v4sf) __Y, __P, 12399 (__mmask8) -1); 12400 } 12401 12402 extern __inline __mmask8 12403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12404 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y, 12405 const int __P) 12406 { 12407 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, 12408 (__v2di) __Y, __P, 12409 (__mmask8) __U); 12410 } 12411 12412 extern __inline __mmask8 12413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12414 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y, 12415 const int __P) 12416 { 12417 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, 12418 (__v4si) __Y, __P, 12419 (__mmask8) __U); 12420 } 12421 12422 extern __inline __mmask8 12423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12424 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y, 12425 const int __P) 12426 { 12427 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, 12428 (__v2di) __Y, __P, 12429 (__mmask8) __U); 12430 } 12431 12432 extern __inline __mmask8 12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12434 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y, 12435 const int __P) 12436 { 12437 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, 12438 (__v4si) __Y, __P, 12439 (__mmask8) __U); 12440 } 12441 12442 extern __inline __mmask8 12443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12444 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y, 12445 const int __P) 12446 { 12447 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X, 12448 (__v2df) __Y, __P, 12449 (__mmask8) __U); 12450 } 12451 12452 extern __inline __mmask8 12453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12454 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y, 12455 const int __P) 12456 { 12457 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X, 12458 (__v4sf) __Y, __P, 12459 (__mmask8) __U); 12460 } 12461 12462 extern __inline __m256d 12463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12464 _mm256_permutex_pd (__m256d __X, const int __M) 12465 { 12466 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M, 12467 (__v4df) 12468 _mm256_undefined_pd (), 12469 (__mmask8) -1); 12470 } 12471 12472 #else 12473 #define _mm256_permutex_pd(X, M) \ 12474 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \ 12475 (__v4df)(__m256d) \ 12476 _mm256_undefined_pd (), \ 12477 (__mmask8)-1)) 12478 12479 #define _mm256_permutex_epi64(X, I) \ 12480 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \ 12481 (int)(I), \ 12482 (__v4di)(__m256i) \ 12483 (_mm256_setzero_si256 ()),\ 12484 (__mmask8) -1)) 12485 12486 #define _mm256_maskz_permutex_epi64(M, X, I) \ 12487 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \ 12488 (int)(I), \ 12489 (__v4di)(__m256i) \ 12490 (_mm256_setzero_si256 ()),\ 12491 (__mmask8)(M))) 12492 12493 #define _mm256_mask_permutex_epi64(W, M, X, I) \ 12494 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \ 12495 (int)(I), \ 12496 (__v4di)(__m256i)(W), \ 12497 (__mmask8)(M))) 12498 12499 #define _mm256_insertf32x4(X, Y, C) \ 12500 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \ 12501 (__v4sf)(__m128) (Y), (int) (C), \ 12502 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12503 (__mmask8)-1)) 12504 12505 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \ 12506 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \ 12507 (__v4sf)(__m128) (Y), (int) (C), \ 12508 (__v8sf)(__m256)(W), \ 12509 (__mmask8)(U))) 12510 12511 #define _mm256_maskz_insertf32x4(U, X, Y, C) \ 12512 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \ 12513 (__v4sf)(__m128) (Y), (int) (C), \ 12514 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12515 (__mmask8)(U))) 12516 12517 #define _mm256_inserti32x4(X, Y, C) \ 12518 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\ 12519 (__v4si)(__m128i) (Y), (int) (C), \ 12520 (__v8si)(__m256i)_mm256_setzero_si256 (), \ 12521 (__mmask8)-1)) 12522 12523 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \ 12524 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\ 12525 (__v4si)(__m128i) (Y), (int) (C), \ 12526 (__v8si)(__m256i)(W), \ 12527 (__mmask8)(U))) 12528 12529 #define _mm256_maskz_inserti32x4(U, X, Y, C) \ 12530 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\ 12531 (__v4si)(__m128i) (Y), (int) (C), \ 12532 (__v8si)(__m256i)_mm256_setzero_si256 (), \ 12533 (__mmask8)(U))) 12534 12535 #define _mm256_extractf32x4_ps(X, C) \ 12536 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \ 12537 (int) (C), \ 12538 (__v4sf)(__m128)_mm_setzero_ps (), \ 12539 (__mmask8)-1)) 12540 12541 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \ 12542 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \ 12543 (int) (C), \ 12544 (__v4sf)(__m128)(W), \ 12545 (__mmask8)(U))) 12546 12547 #define _mm256_maskz_extractf32x4_ps(U, X, C) \ 12548 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \ 12549 (int) (C), \ 12550 (__v4sf)(__m128)_mm_setzero_ps (), \ 12551 (__mmask8)(U))) 12552 12553 #define _mm256_extracti32x4_epi32(X, C) \ 12554 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\ 12555 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1)) 12556 12557 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \ 12558 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\ 12559 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U))) 12560 12561 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \ 12562 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\ 12563 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U))) 12564 12565 #define _mm256_shuffle_i64x2(X, Y, C) \ 12566 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \ 12567 (__v4di)(__m256i)(Y), (int)(C), \ 12568 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 12569 (__mmask8)-1)) 12570 12571 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \ 12572 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \ 12573 (__v4di)(__m256i)(Y), (int)(C), \ 12574 (__v4di)(__m256i)(W),\ 12575 (__mmask8)(U))) 12576 12577 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \ 12578 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \ 12579 (__v4di)(__m256i)(Y), (int)(C), \ 12580 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 12581 (__mmask8)(U))) 12582 12583 #define _mm256_shuffle_i32x4(X, Y, C) \ 12584 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \ 12585 (__v8si)(__m256i)(Y), (int)(C), \ 12586 (__v8si)(__m256i) \ 12587 _mm256_setzero_si256 (), \ 12588 (__mmask8)-1)) 12589 12590 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \ 12591 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \ 12592 (__v8si)(__m256i)(Y), (int)(C), \ 12593 (__v8si)(__m256i)(W), \ 12594 (__mmask8)(U))) 12595 12596 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \ 12597 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \ 12598 (__v8si)(__m256i)(Y), (int)(C), \ 12599 (__v8si)(__m256i) \ 12600 _mm256_setzero_si256 (), \ 12601 (__mmask8)(U))) 12602 12603 #define _mm256_shuffle_f64x2(X, Y, C) \ 12604 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \ 12605 (__v4df)(__m256d)(Y), (int)(C), \ 12606 (__v4df)(__m256d)_mm256_setzero_pd (),\ 12607 (__mmask8)-1)) 12608 12609 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \ 12610 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \ 12611 (__v4df)(__m256d)(Y), (int)(C), \ 12612 (__v4df)(__m256d)(W), \ 12613 (__mmask8)(U))) 12614 12615 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \ 12616 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \ 12617 (__v4df)(__m256d)(Y), (int)(C), \ 12618 (__v4df)(__m256d)_mm256_setzero_pd( ),\ 12619 (__mmask8)(U))) 12620 12621 #define _mm256_shuffle_f32x4(X, Y, C) \ 12622 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \ 12623 (__v8sf)(__m256)(Y), (int)(C), \ 12624 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12625 (__mmask8)-1)) 12626 12627 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \ 12628 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \ 12629 (__v8sf)(__m256)(Y), (int)(C), \ 12630 (__v8sf)(__m256)(W), \ 12631 (__mmask8)(U))) 12632 12633 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \ 12634 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \ 12635 (__v8sf)(__m256)(Y), (int)(C), \ 12636 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12637 (__mmask8)(U))) 12638 12639 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \ 12640 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \ 12641 (__v4df)(__m256d)(B), (int)(C), \ 12642 (__v4df)(__m256d)(W), \ 12643 (__mmask8)(U))) 12644 12645 #define _mm256_maskz_shuffle_pd(U, A, B, C) \ 12646 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \ 12647 (__v4df)(__m256d)(B), (int)(C), \ 12648 (__v4df)(__m256d) \ 12649 _mm256_setzero_pd (), \ 12650 (__mmask8)(U))) 12651 12652 #define _mm_mask_shuffle_pd(W, U, A, B, C) \ 12653 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \ 12654 (__v2df)(__m128d)(B), (int)(C), \ 12655 (__v2df)(__m128d)(W), \ 12656 (__mmask8)(U))) 12657 12658 #define _mm_maskz_shuffle_pd(U, A, B, C) \ 12659 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \ 12660 (__v2df)(__m128d)(B), (int)(C), \ 12661 (__v2df)(__m128d)_mm_setzero_pd (), \ 12662 (__mmask8)(U))) 12663 12664 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \ 12665 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \ 12666 (__v8sf)(__m256)(B), (int)(C), \ 12667 (__v8sf)(__m256)(W), \ 12668 (__mmask8)(U))) 12669 12670 #define _mm256_maskz_shuffle_ps(U, A, B, C) \ 12671 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \ 12672 (__v8sf)(__m256)(B), (int)(C), \ 12673 (__v8sf)(__m256)_mm256_setzero_ps (),\ 12674 (__mmask8)(U))) 12675 12676 #define _mm_mask_shuffle_ps(W, U, A, B, C) \ 12677 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \ 12678 (__v4sf)(__m128)(B), (int)(C), \ 12679 (__v4sf)(__m128)(W), \ 12680 (__mmask8)(U))) 12681 12682 #define _mm_maskz_shuffle_ps(U, A, B, C) \ 12683 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \ 12684 (__v4sf)(__m128)(B), (int)(C), \ 12685 (__v4sf)(__m128)_mm_setzero_ps (), \ 12686 (__mmask8)(U))) 12687 12688 #define _mm256_fixupimm_pd(X, Y, Z, C) \ 12689 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \ 12690 (__v4df)(__m256d)(Y), \ 12691 (__v4di)(__m256i)(Z), (int)(C), \ 12692 (__mmask8)(-1))) 12693 12694 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \ 12695 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \ 12696 (__v4df)(__m256d)(Y), \ 12697 (__v4di)(__m256i)(Z), (int)(C), \ 12698 (__mmask8)(U))) 12699 12700 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \ 12701 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \ 12702 (__v4df)(__m256d)(Y), \ 12703 (__v4di)(__m256i)(Z), (int)(C),\ 12704 (__mmask8)(U))) 12705 12706 #define _mm256_fixupimm_ps(X, Y, Z, C) \ 12707 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \ 12708 (__v8sf)(__m256)(Y), \ 12709 (__v8si)(__m256i)(Z), (int)(C), \ 12710 (__mmask8)(-1))) 12711 12712 12713 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \ 12714 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \ 12715 (__v8sf)(__m256)(Y), \ 12716 (__v8si)(__m256i)(Z), (int)(C), \ 12717 (__mmask8)(U))) 12718 12719 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \ 12720 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \ 12721 (__v8sf)(__m256)(Y), \ 12722 (__v8si)(__m256i)(Z), (int)(C),\ 12723 (__mmask8)(U))) 12724 12725 #define _mm_fixupimm_pd(X, Y, Z, C) \ 12726 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \ 12727 (__v2df)(__m128d)(Y), \ 12728 (__v2di)(__m128i)(Z), (int)(C), \ 12729 (__mmask8)(-1))) 12730 12731 12732 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \ 12733 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \ 12734 (__v2df)(__m128d)(Y), \ 12735 (__v2di)(__m128i)(Z), (int)(C), \ 12736 (__mmask8)(U))) 12737 12738 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \ 12739 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \ 12740 (__v2df)(__m128d)(Y), \ 12741 (__v2di)(__m128i)(Z), (int)(C),\ 12742 (__mmask8)(U))) 12743 12744 #define _mm_fixupimm_ps(X, Y, Z, C) \ 12745 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \ 12746 (__v4sf)(__m128)(Y), \ 12747 (__v4si)(__m128i)(Z), (int)(C), \ 12748 (__mmask8)(-1))) 12749 12750 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \ 12751 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \ 12752 (__v4sf)(__m128)(Y), \ 12753 (__v4si)(__m128i)(Z), (int)(C),\ 12754 (__mmask8)(U))) 12755 12756 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \ 12757 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \ 12758 (__v4sf)(__m128)(Y), \ 12759 (__v4si)(__m128i)(Z), (int)(C),\ 12760 (__mmask8)(U))) 12761 12762 #define _mm256_mask_srli_epi32(W, U, A, B) \ 12763 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \ 12764 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U))) 12765 12766 #define _mm256_maskz_srli_epi32(U, A, B) \ 12767 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \ 12768 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U))) 12769 12770 #define _mm_mask_srli_epi32(W, U, A, B) \ 12771 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \ 12772 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U))) 12773 12774 #define _mm_maskz_srli_epi32(U, A, B) \ 12775 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \ 12776 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U))) 12777 12778 #define _mm256_mask_srli_epi64(W, U, A, B) \ 12779 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \ 12780 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U))) 12781 12782 #define _mm256_maskz_srli_epi64(U, A, B) \ 12783 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \ 12784 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U))) 12785 12786 #define _mm_mask_srli_epi64(W, U, A, B) \ 12787 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \ 12788 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U))) 12789 12790 #define _mm_maskz_srli_epi64(U, A, B) \ 12791 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \ 12792 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U))) 12793 12794 #define _mm256_mask_slli_epi32(W, U, X, C) \ 12795 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\ 12796 (__v8si)(__m256i)(W), \ 12797 (__mmask8)(U))) 12798 12799 #define _mm256_maskz_slli_epi32(U, X, C) \ 12800 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\ 12801 (__v8si)(__m256i)_mm256_setzero_si256 (), \ 12802 (__mmask8)(U))) 12803 12804 #define _mm256_mask_slli_epi64(W, U, X, C) \ 12805 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\ 12806 (__v4di)(__m256i)(W), \ 12807 (__mmask8)(U))) 12808 12809 #define _mm256_maskz_slli_epi64(U, X, C) \ 12810 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\ 12811 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 12812 (__mmask8)(U))) 12813 12814 #define _mm_mask_slli_epi32(W, U, X, C) \ 12815 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\ 12816 (__v4si)(__m128i)(W),\ 12817 (__mmask8)(U))) 12818 12819 #define _mm_maskz_slli_epi32(U, X, C) \ 12820 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\ 12821 (__v4si)(__m128i)_mm_setzero_si128 (),\ 12822 (__mmask8)(U))) 12823 12824 #define _mm_mask_slli_epi64(W, U, X, C) \ 12825 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\ 12826 (__v2di)(__m128i)(W),\ 12827 (__mmask8)(U))) 12828 12829 #define _mm_maskz_slli_epi64(U, X, C) \ 12830 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\ 12831 (__v2di)(__m128i)_mm_setzero_si128 (),\ 12832 (__mmask8)(U))) 12833 12834 #define _mm256_ternarylogic_epi64(A, B, C, I) \ 12835 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \ 12836 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1)) 12837 12838 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \ 12839 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \ 12840 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) 12841 12842 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \ 12843 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \ 12844 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) 12845 12846 #define _mm256_ternarylogic_epi32(A, B, C, I) \ 12847 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \ 12848 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1)) 12849 12850 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \ 12851 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \ 12852 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) 12853 12854 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \ 12855 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \ 12856 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) 12857 12858 #define _mm_ternarylogic_epi64(A, B, C, I) \ 12859 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \ 12860 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1)) 12861 12862 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \ 12863 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \ 12864 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) 12865 12866 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \ 12867 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \ 12868 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) 12869 12870 #define _mm_ternarylogic_epi32(A, B, C, I) \ 12871 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \ 12872 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1)) 12873 12874 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \ 12875 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \ 12876 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) 12877 12878 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \ 12879 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \ 12880 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) 12881 12882 #define _mm256_roundscale_ps(A, B) \ 12883 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ 12884 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1)) 12885 12886 #define _mm256_mask_roundscale_ps(W, U, A, B) \ 12887 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ 12888 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) 12889 12890 #define _mm256_maskz_roundscale_ps(U, A, B) \ 12891 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ 12892 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U))) 12893 12894 #define _mm256_roundscale_pd(A, B) \ 12895 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ 12896 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1)) 12897 12898 #define _mm256_mask_roundscale_pd(W, U, A, B) \ 12899 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ 12900 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) 12901 12902 #define _mm256_maskz_roundscale_pd(U, A, B) \ 12903 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \ 12904 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U))) 12905 12906 #define _mm_roundscale_ps(A, B) \ 12907 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ 12908 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1)) 12909 12910 #define _mm_mask_roundscale_ps(W, U, A, B) \ 12911 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ 12912 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U))) 12913 12914 #define _mm_maskz_roundscale_ps(U, A, B) \ 12915 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \ 12916 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U))) 12917 12918 #define _mm_roundscale_pd(A, B) \ 12919 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ 12920 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1)) 12921 12922 #define _mm_mask_roundscale_pd(W, U, A, B) \ 12923 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ 12924 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U))) 12925 12926 #define _mm_maskz_roundscale_pd(U, A, B) \ 12927 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \ 12928 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U))) 12929 12930 #define _mm256_getmant_ps(X, B, C) \ 12931 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \ 12932 (int)(((C)<<2) | (B)), \ 12933 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12934 (__mmask8)-1)) 12935 12936 #define _mm256_mask_getmant_ps(W, U, X, B, C) \ 12937 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \ 12938 (int)(((C)<<2) | (B)), \ 12939 (__v8sf)(__m256)(W), \ 12940 (__mmask8)(U))) 12941 12942 #define _mm256_maskz_getmant_ps(U, X, B, C) \ 12943 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \ 12944 (int)(((C)<<2) | (B)), \ 12945 (__v8sf)(__m256)_mm256_setzero_ps (), \ 12946 (__mmask8)(U))) 12947 12948 #define _mm_getmant_ps(X, B, C) \ 12949 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \ 12950 (int)(((C)<<2) | (B)), \ 12951 (__v4sf)(__m128)_mm_setzero_ps (), \ 12952 (__mmask8)-1)) 12953 12954 #define _mm_mask_getmant_ps(W, U, X, B, C) \ 12955 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \ 12956 (int)(((C)<<2) | (B)), \ 12957 (__v4sf)(__m128)(W), \ 12958 (__mmask8)(U))) 12959 12960 #define _mm_maskz_getmant_ps(U, X, B, C) \ 12961 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \ 12962 (int)(((C)<<2) | (B)), \ 12963 (__v4sf)(__m128)_mm_setzero_ps (), \ 12964 (__mmask8)(U))) 12965 12966 #define _mm256_getmant_pd(X, B, C) \ 12967 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \ 12968 (int)(((C)<<2) | (B)), \ 12969 (__v4df)(__m256d)_mm256_setzero_pd (),\ 12970 (__mmask8)-1)) 12971 12972 #define _mm256_mask_getmant_pd(W, U, X, B, C) \ 12973 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \ 12974 (int)(((C)<<2) | (B)), \ 12975 (__v4df)(__m256d)(W), \ 12976 (__mmask8)(U))) 12977 12978 #define _mm256_maskz_getmant_pd(U, X, B, C) \ 12979 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \ 12980 (int)(((C)<<2) | (B)), \ 12981 (__v4df)(__m256d)_mm256_setzero_pd (),\ 12982 (__mmask8)(U))) 12983 12984 #define _mm_getmant_pd(X, B, C) \ 12985 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \ 12986 (int)(((C)<<2) | (B)), \ 12987 (__v2df)(__m128d)_mm_setzero_pd (), \ 12988 (__mmask8)-1)) 12989 12990 #define _mm_mask_getmant_pd(W, U, X, B, C) \ 12991 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \ 12992 (int)(((C)<<2) | (B)), \ 12993 (__v2df)(__m128d)(W), \ 12994 (__mmask8)(U))) 12995 12996 #define _mm_maskz_getmant_pd(U, X, B, C) \ 12997 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \ 12998 (int)(((C)<<2) | (B)), \ 12999 (__v2df)(__m128d)_mm_setzero_pd (), \ 13000 (__mmask8)(U))) 13001 13002 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13003 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD), \ 13004 (void const *) (ADDR), \ 13005 (__v8si)(__m256i) (INDEX), \ 13006 (__mmask8) (MASK), \ 13007 (int) (SCALE)) 13008 13009 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13010 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD), \ 13011 (void const *) (ADDR), \ 13012 (__v4si)(__m128i) (INDEX), \ 13013 (__mmask8) (MASK), \ 13014 (int) (SCALE)) 13015 13016 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13017 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD), \ 13018 (void const *) (ADDR), \ 13019 (__v4si)(__m128i) (INDEX), \ 13020 (__mmask8) (MASK), \ 13021 (int) (SCALE)) 13022 13023 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13024 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD), \ 13025 (void const *) (ADDR), \ 13026 (__v4si)(__m128i) (INDEX), \ 13027 (__mmask8) (MASK), \ 13028 (int) (SCALE)) 13029 13030 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13031 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD), \ 13032 (void const *) (ADDR), \ 13033 (__v4di)(__m256i) (INDEX), \ 13034 (__mmask8) (MASK), \ 13035 (int) (SCALE)) 13036 13037 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13038 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD), \ 13039 (void const *) (ADDR), \ 13040 (__v2di)(__m128i) (INDEX), \ 13041 (__mmask8) (MASK), \ 13042 (int) (SCALE)) 13043 13044 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13045 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD), \ 13046 (void const *) (ADDR), \ 13047 (__v4di)(__m256i) (INDEX), \ 13048 (__mmask8) (MASK), \ 13049 (int) (SCALE)) 13050 13051 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13052 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD), \ 13053 (void const *) (ADDR), \ 13054 (__v2di)(__m128i) (INDEX), \ 13055 (__mmask8) (MASK), \ 13056 (int) (SCALE)) 13057 13058 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13059 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD), \ 13060 (void const *) (ADDR), \ 13061 (__v8si)(__m256i) (INDEX), \ 13062 (__mmask8) (MASK), \ 13063 (int) (SCALE)) 13064 13065 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13066 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD), \ 13067 (void const *) (ADDR), \ 13068 (__v4si)(__m128i) (INDEX), \ 13069 (__mmask8) (MASK), \ 13070 (int) (SCALE)) 13071 13072 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13073 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD), \ 13074 (void const *) (ADDR), \ 13075 (__v4si)(__m128i) (INDEX), \ 13076 (__mmask8) (MASK), \ 13077 (int) (SCALE)) 13078 13079 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13080 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD), \ 13081 (void const *) (ADDR), \ 13082 (__v4si)(__m128i) (INDEX), \ 13083 (__mmask8) (MASK), \ 13084 (int) (SCALE)) 13085 13086 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13087 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD), \ 13088 (void const *) (ADDR), \ 13089 (__v4di)(__m256i) (INDEX), \ 13090 (__mmask8) (MASK), \ 13091 (int) (SCALE)) 13092 13093 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13094 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD), \ 13095 (void const *) (ADDR), \ 13096 (__v2di)(__m128i) (INDEX), \ 13097 (__mmask8) (MASK), \ 13098 (int) (SCALE)) 13099 13100 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13101 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD), \ 13102 (void const *) (ADDR), \ 13103 (__v4di)(__m256i) (INDEX), \ 13104 (__mmask8) (MASK), \ 13105 (int) (SCALE)) 13106 13107 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 13108 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD), \ 13109 (void const *) (ADDR), \ 13110 (__v2di)(__m128i) (INDEX), \ 13111 (__mmask8) (MASK), \ 13112 (int) (SCALE)) 13113 13114 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 13115 __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF, \ 13116 (__v8si)(__m256i) (INDEX), \ 13117 (__v8sf)(__m256) (V1), (int) (SCALE)) 13118 13119 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 13120 __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK), \ 13121 (__v8si)(__m256i) (INDEX), \ 13122 (__v8sf)(__m256) (V1), (int) (SCALE)) 13123 13124 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 13125 __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF, \ 13126 (__v4si)(__m128i) (INDEX), \ 13127 (__v4sf)(__m128) (V1), (int) (SCALE)) 13128 13129 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 13130 __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK), \ 13131 (__v4si)(__m128i) (INDEX), \ 13132 (__v4sf)(__m128) (V1), (int) (SCALE)) 13133 13134 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 13135 __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF, \ 13136 (__v4si)(__m128i) (INDEX), \ 13137 (__v4df)(__m256d) (V1), (int) (SCALE)) 13138 13139 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 13140 __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK), \ 13141 (__v4si)(__m128i) (INDEX), \ 13142 (__v4df)(__m256d) (V1), (int) (SCALE)) 13143 13144 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 13145 __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF, \ 13146 (__v4si)(__m128i) (INDEX), \ 13147 (__v2df)(__m128d) (V1), (int) (SCALE)) 13148 13149 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 13150 __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK), \ 13151 (__v4si)(__m128i) (INDEX), \ 13152 (__v2df)(__m128d) (V1), (int) (SCALE)) 13153 13154 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 13155 __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF, \ 13156 (__v4di)(__m256i) (INDEX), \ 13157 (__v4sf)(__m128) (V1), (int) (SCALE)) 13158 13159 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 13160 __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK), \ 13161 (__v4di)(__m256i) (INDEX), \ 13162 (__v4sf)(__m128) (V1), (int) (SCALE)) 13163 13164 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 13165 __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF, \ 13166 (__v2di)(__m128i) (INDEX), \ 13167 (__v4sf)(__m128) (V1), (int) (SCALE)) 13168 13169 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 13170 __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK), \ 13171 (__v2di)(__m128i) (INDEX), \ 13172 (__v4sf)(__m128) (V1), (int) (SCALE)) 13173 13174 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 13175 __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF, \ 13176 (__v4di)(__m256i) (INDEX), \ 13177 (__v4df)(__m256d) (V1), (int) (SCALE)) 13178 13179 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 13180 __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK), \ 13181 (__v4di)(__m256i) (INDEX), \ 13182 (__v4df)(__m256d) (V1), (int) (SCALE)) 13183 13184 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 13185 __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF, \ 13186 (__v2di)(__m128i) (INDEX), \ 13187 (__v2df)(__m128d) (V1), (int) (SCALE)) 13188 13189 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 13190 __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK), \ 13191 (__v2di)(__m128i) (INDEX), \ 13192 (__v2df)(__m128d) (V1), (int) (SCALE)) 13193 13194 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 13195 __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF, \ 13196 (__v8si)(__m256i) (INDEX), \ 13197 (__v8si)(__m256i) (V1), (int) (SCALE)) 13198 13199 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 13200 __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK), \ 13201 (__v8si)(__m256i) (INDEX), \ 13202 (__v8si)(__m256i) (V1), (int) (SCALE)) 13203 13204 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 13205 __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF, \ 13206 (__v4si)(__m128i) (INDEX), \ 13207 (__v4si)(__m128i) (V1), (int) (SCALE)) 13208 13209 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 13210 __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK), \ 13211 (__v4si)(__m128i) (INDEX), \ 13212 (__v4si)(__m128i) (V1), (int) (SCALE)) 13213 13214 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 13215 __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF, \ 13216 (__v4si)(__m128i) (INDEX), \ 13217 (__v4di)(__m256i) (V1), (int) (SCALE)) 13218 13219 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 13220 __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK), \ 13221 (__v4si)(__m128i) (INDEX), \ 13222 (__v4di)(__m256i) (V1), (int) (SCALE)) 13223 13224 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 13225 __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF, \ 13226 (__v4si)(__m128i) (INDEX), \ 13227 (__v2di)(__m128i) (V1), (int) (SCALE)) 13228 13229 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 13230 __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK), \ 13231 (__v4si)(__m128i) (INDEX), \ 13232 (__v2di)(__m128i) (V1), (int) (SCALE)) 13233 13234 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 13235 __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF, \ 13236 (__v4di)(__m256i) (INDEX), \ 13237 (__v4si)(__m128i) (V1), (int) (SCALE)) 13238 13239 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 13240 __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK), \ 13241 (__v4di)(__m256i) (INDEX), \ 13242 (__v4si)(__m128i) (V1), (int) (SCALE)) 13243 13244 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 13245 __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF, \ 13246 (__v2di)(__m128i) (INDEX), \ 13247 (__v4si)(__m128i) (V1), (int) (SCALE)) 13248 13249 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 13250 __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK), \ 13251 (__v2di)(__m128i) (INDEX), \ 13252 (__v4si)(__m128i) (V1), (int) (SCALE)) 13253 13254 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 13255 __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF, \ 13256 (__v4di)(__m256i) (INDEX), \ 13257 (__v4di)(__m256i) (V1), (int) (SCALE)) 13258 13259 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 13260 __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK), \ 13261 (__v4di)(__m256i) (INDEX), \ 13262 (__v4di)(__m256i) (V1), (int) (SCALE)) 13263 13264 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 13265 __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF, \ 13266 (__v2di)(__m128i) (INDEX), \ 13267 (__v2di)(__m128i) (V1), (int) (SCALE)) 13268 13269 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 13270 __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK), \ 13271 (__v2di)(__m128i) (INDEX), \ 13272 (__v2di)(__m128i) (V1), (int) (SCALE)) 13273 13274 #define _mm256_mask_shuffle_epi32(W, U, X, C) \ 13275 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \ 13276 (__v8si)(__m256i)(W), \ 13277 (__mmask8)(U))) 13278 13279 #define _mm256_maskz_shuffle_epi32(U, X, C) \ 13280 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \ 13281 (__v8si)(__m256i) \ 13282 _mm256_setzero_si256 (), \ 13283 (__mmask8)(U))) 13284 13285 #define _mm_mask_shuffle_epi32(W, U, X, C) \ 13286 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \ 13287 (__v4si)(__m128i)(W), \ 13288 (__mmask8)(U))) 13289 13290 #define _mm_maskz_shuffle_epi32(U, X, C) \ 13291 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \ 13292 (__v4si)(__m128i)_mm_setzero_si128 (), \ 13293 (__mmask8)(U))) 13294 13295 #define _mm256_rol_epi64(A, B) \ 13296 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13297 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 13298 (__mmask8)-1)) 13299 13300 #define _mm256_mask_rol_epi64(W, U, A, B) \ 13301 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13302 (__v4di)(__m256i)(W), \ 13303 (__mmask8)(U))) 13304 13305 #define _mm256_maskz_rol_epi64(U, A, B) \ 13306 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13307 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 13308 (__mmask8)(U))) 13309 13310 #define _mm_rol_epi64(A, B) \ 13311 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13312 (__v2di)(__m128i)_mm_setzero_si128 (),\ 13313 (__mmask8)-1)) 13314 13315 #define _mm_mask_rol_epi64(W, U, A, B) \ 13316 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13317 (__v2di)(__m128i)(W), \ 13318 (__mmask8)(U))) 13319 13320 #define _mm_maskz_rol_epi64(U, A, B) \ 13321 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13322 (__v2di)(__m128i)_mm_setzero_si128 (),\ 13323 (__mmask8)(U))) 13324 13325 #define _mm256_ror_epi64(A, B) \ 13326 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13327 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 13328 (__mmask8)-1)) 13329 13330 #define _mm256_mask_ror_epi64(W, U, A, B) \ 13331 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13332 (__v4di)(__m256i)(W), \ 13333 (__mmask8)(U))) 13334 13335 #define _mm256_maskz_ror_epi64(U, A, B) \ 13336 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \ 13337 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 13338 (__mmask8)(U))) 13339 13340 #define _mm_ror_epi64(A, B) \ 13341 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13342 (__v2di)(__m128i)_mm_setzero_si128 (),\ 13343 (__mmask8)-1)) 13344 13345 #define _mm_mask_ror_epi64(W, U, A, B) \ 13346 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13347 (__v2di)(__m128i)(W), \ 13348 (__mmask8)(U))) 13349 13350 #define _mm_maskz_ror_epi64(U, A, B) \ 13351 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \ 13352 (__v2di)(__m128i)_mm_setzero_si128 (),\ 13353 (__mmask8)(U))) 13354 13355 #define _mm256_rol_epi32(A, B) \ 13356 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13357 (__v8si)(__m256i)_mm256_setzero_si256 (),\ 13358 (__mmask8)-1)) 13359 13360 #define _mm256_mask_rol_epi32(W, U, A, B) \ 13361 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13362 (__v8si)(__m256i)(W), \ 13363 (__mmask8)(U))) 13364 13365 #define _mm256_maskz_rol_epi32(U, A, B) \ 13366 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13367 (__v8si)(__m256i)_mm256_setzero_si256 (),\ 13368 (__mmask8)(U))) 13369 13370 #define _mm_rol_epi32(A, B) \ 13371 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13372 (__v4si)(__m128i)_mm_setzero_si128 (),\ 13373 (__mmask8)-1)) 13374 13375 #define _mm_mask_rol_epi32(W, U, A, B) \ 13376 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13377 (__v4si)(__m128i)(W), \ 13378 (__mmask8)(U))) 13379 13380 #define _mm_maskz_rol_epi32(U, A, B) \ 13381 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13382 (__v4si)(__m128i)_mm_setzero_si128 (),\ 13383 (__mmask8)(U))) 13384 13385 #define _mm256_ror_epi32(A, B) \ 13386 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13387 (__v8si)(__m256i)_mm256_setzero_si256 (),\ 13388 (__mmask8)-1)) 13389 13390 #define _mm256_mask_ror_epi32(W, U, A, B) \ 13391 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13392 (__v8si)(__m256i)(W), \ 13393 (__mmask8)(U))) 13394 13395 #define _mm256_maskz_ror_epi32(U, A, B) \ 13396 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \ 13397 (__v8si)(__m256i) \ 13398 _mm256_setzero_si256 (), \ 13399 (__mmask8)(U))) 13400 13401 #define _mm_ror_epi32(A, B) \ 13402 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13403 (__v4si)(__m128i)_mm_setzero_si128 (),\ 13404 (__mmask8)-1)) 13405 13406 #define _mm_mask_ror_epi32(W, U, A, B) \ 13407 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13408 (__v4si)(__m128i)(W), \ 13409 (__mmask8)(U))) 13410 13411 #define _mm_maskz_ror_epi32(U, A, B) \ 13412 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \ 13413 (__v4si)(__m128i)_mm_setzero_si128 (),\ 13414 (__mmask8)(U))) 13415 13416 #define _mm256_alignr_epi32(X, Y, C) \ 13417 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \ 13418 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1)) 13419 13420 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \ 13421 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \ 13422 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U))) 13423 13424 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \ 13425 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \ 13426 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\ 13427 (__mmask8)(U))) 13428 13429 #define _mm256_alignr_epi64(X, Y, C) \ 13430 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \ 13431 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1)) 13432 13433 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \ 13434 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \ 13435 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U))) 13436 13437 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \ 13438 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \ 13439 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\ 13440 (__mmask8)(U))) 13441 13442 #define _mm_alignr_epi32(X, Y, C) \ 13443 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \ 13444 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1)) 13445 13446 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \ 13447 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \ 13448 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U))) 13449 13450 #define _mm_maskz_alignr_epi32(U, X, Y, C) \ 13451 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \ 13452 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\ 13453 (__mmask8)(U))) 13454 13455 #define _mm_alignr_epi64(X, Y, C) \ 13456 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ 13457 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1)) 13458 13459 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \ 13460 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ 13461 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1)) 13462 13463 #define _mm_maskz_alignr_epi64(U, X, Y, C) \ 13464 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ 13465 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\ 13466 (__mmask8)(U))) 13467 13468 #define _mm_mask_cvtps_ph(W, U, A, I) \ 13469 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \ 13470 (__v8hi)(__m128i) (W), (__mmask8) (U))) 13471 13472 #define _mm_maskz_cvtps_ph(U, A, I) \ 13473 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) (A), (int) (I), \ 13474 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 13475 13476 #define _mm256_mask_cvtps_ph(W, U, A, I) \ 13477 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \ 13478 (__v8hi)(__m128i) (W), (__mmask8) (U))) 13479 13480 #define _mm256_maskz_cvtps_ph(U, A, I) \ 13481 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) (A), (int) (I), \ 13482 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 13483 13484 #define _mm256_mask_srai_epi32(W, U, A, B) \ 13485 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \ 13486 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U))) 13487 13488 #define _mm256_maskz_srai_epi32(U, A, B) \ 13489 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \ 13490 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U))) 13491 13492 #define _mm_mask_srai_epi32(W, U, A, B) \ 13493 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \ 13494 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U))) 13495 13496 #define _mm_maskz_srai_epi32(U, A, B) \ 13497 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \ 13498 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U))) 13499 13500 #define _mm256_srai_epi64(A, B) \ 13501 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \ 13502 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1)) 13503 13504 #define _mm256_mask_srai_epi64(W, U, A, B) \ 13505 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \ 13506 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U))) 13507 13508 #define _mm256_maskz_srai_epi64(U, A, B) \ 13509 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \ 13510 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U))) 13511 13512 #define _mm_srai_epi64(A, B) \ 13513 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ 13514 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1)) 13515 13516 #define _mm_mask_srai_epi64(W, U, A, B) \ 13517 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ 13518 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U))) 13519 13520 #define _mm_maskz_srai_epi64(U, A, B) \ 13521 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \ 13522 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U))) 13523 13524 #define _mm256_mask_permutex_pd(W, U, A, B) \ 13525 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \ 13526 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) 13527 13528 #define _mm256_maskz_permutex_pd(U, A, B) \ 13529 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \ 13530 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U))) 13531 13532 #define _mm256_mask_permute_pd(W, U, X, C) \ 13533 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \ 13534 (__v4df)(__m256d)(W), \ 13535 (__mmask8)(U))) 13536 13537 #define _mm256_maskz_permute_pd(U, X, C) \ 13538 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \ 13539 (__v4df)(__m256d)_mm256_setzero_pd (),\ 13540 (__mmask8)(U))) 13541 13542 #define _mm256_mask_permute_ps(W, U, X, C) \ 13543 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \ 13544 (__v8sf)(__m256)(W), (__mmask8)(U))) 13545 13546 #define _mm256_maskz_permute_ps(U, X, C) \ 13547 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \ 13548 (__v8sf)(__m256)_mm256_setzero_ps (), \ 13549 (__mmask8)(U))) 13550 13551 #define _mm_mask_permute_pd(W, U, X, C) \ 13552 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \ 13553 (__v2df)(__m128d)(W), (__mmask8)(U))) 13554 13555 #define _mm_maskz_permute_pd(U, X, C) \ 13556 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \ 13557 (__v2df)(__m128d)_mm_setzero_pd (), \ 13558 (__mmask8)(U))) 13559 13560 #define _mm_mask_permute_ps(W, U, X, C) \ 13561 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \ 13562 (__v4sf)(__m128)(W), (__mmask8)(U))) 13563 13564 #define _mm_maskz_permute_ps(U, X, C) \ 13565 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \ 13566 (__v4sf)(__m128)_mm_setzero_ps (), \ 13567 (__mmask8)(U))) 13568 13569 #define _mm256_mask_blend_pd(__U, __A, __W) \ 13570 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \ 13571 (__v4df) (__W), \ 13572 (__mmask8) (__U))) 13573 13574 #define _mm256_mask_blend_ps(__U, __A, __W) \ 13575 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \ 13576 (__v8sf) (__W), \ 13577 (__mmask8) (__U))) 13578 13579 #define _mm256_mask_blend_epi64(__U, __A, __W) \ 13580 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \ 13581 (__v4di) (__W), \ 13582 (__mmask8) (__U))) 13583 13584 #define _mm256_mask_blend_epi32(__U, __A, __W) \ 13585 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \ 13586 (__v8si) (__W), \ 13587 (__mmask8) (__U))) 13588 13589 #define _mm_mask_blend_pd(__U, __A, __W) \ 13590 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \ 13591 (__v2df) (__W), \ 13592 (__mmask8) (__U))) 13593 13594 #define _mm_mask_blend_ps(__U, __A, __W) \ 13595 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \ 13596 (__v4sf) (__W), \ 13597 (__mmask8) (__U))) 13598 13599 #define _mm_mask_blend_epi64(__U, __A, __W) \ 13600 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \ 13601 (__v2di) (__W), \ 13602 (__mmask8) (__U))) 13603 13604 #define _mm_mask_blend_epi32(__U, __A, __W) \ 13605 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \ 13606 (__v4si) (__W), \ 13607 (__mmask8) (__U))) 13608 13609 #define _mm256_cmp_epu32_mask(X, Y, P) \ 13610 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \ 13611 (__v8si)(__m256i)(Y), (int)(P),\ 13612 (__mmask8)-1)) 13613 13614 #define _mm256_cmp_epi64_mask(X, Y, P) \ 13615 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \ 13616 (__v4di)(__m256i)(Y), (int)(P),\ 13617 (__mmask8)-1)) 13618 13619 #define _mm256_cmp_epi32_mask(X, Y, P) \ 13620 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \ 13621 (__v8si)(__m256i)(Y), (int)(P),\ 13622 (__mmask8)-1)) 13623 13624 #define _mm256_cmp_epu64_mask(X, Y, P) \ 13625 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \ 13626 (__v4di)(__m256i)(Y), (int)(P),\ 13627 (__mmask8)-1)) 13628 13629 #define _mm256_cmp_pd_mask(X, Y, P) \ 13630 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \ 13631 (__v4df)(__m256d)(Y), (int)(P),\ 13632 (__mmask8)-1)) 13633 13634 #define _mm256_cmp_ps_mask(X, Y, P) \ 13635 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \ 13636 (__v8sf)(__m256)(Y), (int)(P),\ 13637 (__mmask8)-1)) 13638 13639 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \ 13640 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \ 13641 (__v4di)(__m256i)(Y), (int)(P),\ 13642 (__mmask8)(M))) 13643 13644 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \ 13645 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \ 13646 (__v8si)(__m256i)(Y), (int)(P),\ 13647 (__mmask8)(M))) 13648 13649 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \ 13650 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \ 13651 (__v4di)(__m256i)(Y), (int)(P),\ 13652 (__mmask8)(M))) 13653 13654 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \ 13655 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \ 13656 (__v8si)(__m256i)(Y), (int)(P),\ 13657 (__mmask8)(M))) 13658 13659 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \ 13660 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \ 13661 (__v4df)(__m256d)(Y), (int)(P),\ 13662 (__mmask8)(M))) 13663 13664 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \ 13665 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \ 13666 (__v8sf)(__m256)(Y), (int)(P),\ 13667 (__mmask8)(M))) 13668 13669 #define _mm_cmp_epi64_mask(X, Y, P) \ 13670 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \ 13671 (__v2di)(__m128i)(Y), (int)(P),\ 13672 (__mmask8)-1)) 13673 13674 #define _mm_cmp_epi32_mask(X, Y, P) \ 13675 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \ 13676 (__v4si)(__m128i)(Y), (int)(P),\ 13677 (__mmask8)-1)) 13678 13679 #define _mm_cmp_epu64_mask(X, Y, P) \ 13680 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \ 13681 (__v2di)(__m128i)(Y), (int)(P),\ 13682 (__mmask8)-1)) 13683 13684 #define _mm_cmp_epu32_mask(X, Y, P) \ 13685 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \ 13686 (__v4si)(__m128i)(Y), (int)(P),\ 13687 (__mmask8)-1)) 13688 13689 #define _mm_cmp_pd_mask(X, Y, P) \ 13690 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \ 13691 (__v2df)(__m128d)(Y), (int)(P),\ 13692 (__mmask8)-1)) 13693 13694 #define _mm_cmp_ps_mask(X, Y, P) \ 13695 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \ 13696 (__v4sf)(__m128)(Y), (int)(P),\ 13697 (__mmask8)-1)) 13698 13699 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \ 13700 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \ 13701 (__v2di)(__m128i)(Y), (int)(P),\ 13702 (__mmask8)(M))) 13703 13704 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \ 13705 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \ 13706 (__v4si)(__m128i)(Y), (int)(P),\ 13707 (__mmask8)(M))) 13708 13709 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \ 13710 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \ 13711 (__v2di)(__m128i)(Y), (int)(P),\ 13712 (__mmask8)(M))) 13713 13714 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \ 13715 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \ 13716 (__v4si)(__m128i)(Y), (int)(P),\ 13717 (__mmask8)(M))) 13718 13719 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \ 13720 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \ 13721 (__v2df)(__m128d)(Y), (int)(P),\ 13722 (__mmask8)(M))) 13723 13724 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \ 13725 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \ 13726 (__v4sf)(__m128)(Y), (int)(P),\ 13727 (__mmask8)(M))) 13728 13729 #endif 13730 13731 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A)) 13732 13733 #ifdef __DISABLE_AVX512VL__ 13734 #undef __DISABLE_AVX512VL__ 13735 #pragma GCC pop_options 13736 #endif /* __DISABLE_AVX512VL__ */ 13737 13738 #endif /* _AVX512VLINTRIN_H_INCLUDED */ 13739