1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED 29 #define _AVX512VBMI2VLINTRIN_H_INCLUDED 30 31 #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) 32 #pragma GCC push_options 33 #pragma GCC target("avx512vbmi2,avx512vl") 34 #define __DISABLE_AVX512VBMI2VL__ 35 #endif /* __AVX512VBMIVL__ */ 36 37 extern __inline __m128i 38 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39 _mm_mask_compress_epi8 (__m128i __A, __mmask16 __B, __m128i __C) 40 { 41 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi)__C, 42 (__v16qi)__A, (__mmask16)__B); 43 } 44 45 extern __inline __m128i 46 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 47 _mm_maskz_compress_epi8 (__mmask16 __A, __m128i __B) 48 { 49 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __B, 50 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 51 } 52 53 54 extern __inline void 55 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 56 _mm256_mask_compressstoreu_epi16 (void * __A, __mmask16 __B, __m256i __C) 57 { 58 __builtin_ia32_compressstoreuhi256_mask ((__v16hi *) __A, (__v16hi) __C, 59 (__mmask16) __B); 60 } 61 62 extern __inline __m128i 63 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 64 _mm_mask_compress_epi16 (__m128i __A, __mmask8 __B, __m128i __C) 65 { 66 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi)__C, (__v8hi)__A, 67 (__mmask8)__B); 68 } 69 70 extern __inline __m128i 71 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 72 _mm_maskz_compress_epi16 (__mmask8 __A, __m128i __B) 73 { 74 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __B, 75 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 76 } 77 78 extern __inline __m256i 79 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 80 _mm256_mask_compress_epi16 (__m256i __A, __mmask16 __B, __m256i __C) 81 { 82 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi)__C, 83 (__v16hi)__A, (__mmask16)__B); 84 } 85 86 extern __inline __m256i 87 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 88 _mm256_maskz_compress_epi16 (__mmask16 __A, __m256i __B) 89 { 90 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __B, 91 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 92 } 93 94 extern __inline void 95 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 96 _mm_mask_compressstoreu_epi8 (void * __A, __mmask16 __B, __m128i __C) 97 { 98 __builtin_ia32_compressstoreuqi128_mask ((__v16qi *) __A, (__v16qi) __C, 99 (__mmask16) __B); 100 } 101 102 extern __inline void 103 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 104 _mm_mask_compressstoreu_epi16 (void * __A, __mmask8 __B, __m128i __C) 105 { 106 __builtin_ia32_compressstoreuhi128_mask ((__v8hi *) __A, (__v8hi) __C, 107 (__mmask8) __B); 108 } 109 110 extern __inline __m128i 111 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 112 _mm_mask_expand_epi8 (__m128i __A, __mmask16 __B, __m128i __C) 113 { 114 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __C, 115 (__v16qi) __A, 116 (__mmask16) __B); 117 } 118 119 extern __inline __m128i 120 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 121 _mm_maskz_expand_epi8 (__mmask16 __A, __m128i __B) 122 { 123 return (__m128i) __builtin_ia32_expandqi128_maskz ((__v16qi) __B, 124 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 125 } 126 127 extern __inline __m128i 128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 129 _mm_mask_expandloadu_epi8 (__m128i __A, __mmask16 __B, const void * __C) 130 { 131 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *) __C, 132 (__v16qi) __A, (__mmask16) __B); 133 } 134 135 extern __inline __m128i 136 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 137 _mm_maskz_expandloadu_epi8 (__mmask16 __A, const void * __B) 138 { 139 return (__m128i) __builtin_ia32_expandloadqi128_maskz ((const __v16qi *) __B, 140 (__v16qi) _mm_setzero_si128 (), (__mmask16) __A); 141 } 142 143 extern __inline __m128i 144 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 145 _mm_mask_expand_epi16 (__m128i __A, __mmask8 __B, __m128i __C) 146 { 147 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __C, 148 (__v8hi) __A, 149 (__mmask8) __B); 150 } 151 152 extern __inline __m128i 153 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 154 _mm_maskz_expand_epi16 (__mmask8 __A, __m128i __B) 155 { 156 return (__m128i) __builtin_ia32_expandhi128_maskz ((__v8hi) __B, 157 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 158 } 159 160 extern __inline __m128i 161 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 162 _mm_mask_expandloadu_epi16 (__m128i __A, __mmask8 __B, const void * __C) 163 { 164 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *) __C, 165 (__v8hi) __A, (__mmask8) __B); 166 } 167 168 extern __inline __m128i 169 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 170 _mm_maskz_expandloadu_epi16 (__mmask8 __A, const void * __B) 171 { 172 return (__m128i) __builtin_ia32_expandloadhi128_maskz ((const __v8hi *) __B, 173 (__v8hi) _mm_setzero_si128 (), (__mmask8) __A); 174 } 175 extern __inline __m256i 176 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 177 _mm256_mask_expand_epi16 (__m256i __A, __mmask16 __B, __m256i __C) 178 { 179 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __C, 180 (__v16hi) __A, 181 (__mmask16) __B); 182 } 183 184 extern __inline __m256i 185 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 186 _mm256_maskz_expand_epi16 (__mmask16 __A, __m256i __B) 187 { 188 return (__m256i) __builtin_ia32_expandhi256_maskz ((__v16hi) __B, 189 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 190 } 191 192 extern __inline __m256i 193 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 194 _mm256_mask_expandloadu_epi16 (__m256i __A, __mmask16 __B, const void * __C) 195 { 196 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *) __C, 197 (__v16hi) __A, (__mmask16) __B); 198 } 199 200 extern __inline __m256i 201 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 202 _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B) 203 { 204 return (__m256i) __builtin_ia32_expandloadhi256_maskz ((const __v16hi *) __B, 205 (__v16hi) _mm256_setzero_si256 (), (__mmask16) __A); 206 } 207 208 #ifdef __OPTIMIZE__ 209 extern __inline __m256i 210 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 211 _mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C) 212 { 213 return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B, 214 __C); 215 } 216 217 extern __inline __m256i 218 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219 _mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D, 220 int __E) 221 { 222 return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C, 223 (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B); 224 } 225 226 extern __inline __m256i 227 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228 _mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D) 229 { 230 return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B, 231 (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A); 232 } 233 234 extern __inline __m256i 235 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 236 _mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 237 int __E) 238 { 239 return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D, 240 __E, (__v8si) __A, (__mmask8)__B); 241 } 242 243 extern __inline __m256i 244 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 245 _mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 246 { 247 return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C, 248 __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A); 249 } 250 251 extern __inline __m256i 252 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 253 _mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C) 254 { 255 return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C); 256 } 257 258 extern __inline __m256i 259 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 260 _mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 261 int __E) 262 { 263 return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D, 264 __E, (__v4di) __A, (__mmask8)__B); 265 } 266 267 extern __inline __m256i 268 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 269 _mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 270 { 271 return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C, 272 __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A); 273 } 274 275 extern __inline __m256i 276 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 277 _mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C) 278 { 279 return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C); 280 } 281 282 extern __inline __m128i 283 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 284 _mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 285 int __E) 286 { 287 return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D, 288 __E, (__v8hi) __A, (__mmask8)__B); 289 } 290 291 extern __inline __m128i 292 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 293 _mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 294 { 295 return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C, 296 __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A); 297 } 298 299 extern __inline __m128i 300 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 301 _mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C) 302 { 303 return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C); 304 } 305 306 extern __inline __m128i 307 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308 _mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 309 int __E) 310 { 311 return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D, 312 __E, (__v4si) __A, (__mmask8)__B); 313 } 314 315 extern __inline __m128i 316 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 317 _mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 318 { 319 return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C, 320 __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A); 321 } 322 323 extern __inline __m128i 324 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 325 _mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C) 326 { 327 return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C); 328 } 329 330 extern __inline __m128i 331 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 332 _mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 333 int __E) 334 { 335 return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D, 336 __E, (__v2di) __A, (__mmask8)__B); 337 } 338 339 extern __inline __m128i 340 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 341 _mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 342 { 343 return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C, 344 __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A); 345 } 346 347 extern __inline __m128i 348 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 349 _mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C) 350 { 351 return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C); 352 } 353 354 extern __inline __m256i 355 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 356 _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C) 357 { 358 return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B, 359 __C); 360 } 361 362 extern __inline __m256i 363 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 364 _mm256_mask_shldi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D, 365 int __E) 366 { 367 return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__C, 368 (__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B); 369 } 370 371 extern __inline __m256i 372 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 373 _mm256_maskz_shldi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D) 374 { 375 return (__m256i)__builtin_ia32_vpshld_v16hi_mask ((__v16hi)__B, 376 (__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A); 377 } 378 379 extern __inline __m256i 380 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 381 _mm256_mask_shldi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 382 int __E) 383 { 384 return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__C, (__v8si) __D, 385 __E, (__v8si) __A, (__mmask8)__B); 386 } 387 388 extern __inline __m256i 389 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 390 _mm256_maskz_shldi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 391 { 392 return (__m256i)__builtin_ia32_vpshld_v8si_mask ((__v8si)__B, (__v8si) __C, 393 __D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A); 394 } 395 396 extern __inline __m256i 397 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 398 _mm256_shldi_epi32 (__m256i __A, __m256i __B, int __C) 399 { 400 return (__m256i) __builtin_ia32_vpshld_v8si ((__v8si)__A, (__v8si) __B, __C); 401 } 402 403 extern __inline __m256i 404 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 405 _mm256_mask_shldi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D, 406 int __E) 407 { 408 return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__C, (__v4di) __D, 409 __E, (__v4di) __A, (__mmask8)__B); 410 } 411 412 extern __inline __m256i 413 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 414 _mm256_maskz_shldi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D) 415 { 416 return (__m256i)__builtin_ia32_vpshld_v4di_mask ((__v4di)__B, (__v4di) __C, 417 __D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A); 418 } 419 420 extern __inline __m256i 421 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 422 _mm256_shldi_epi64 (__m256i __A, __m256i __B, int __C) 423 { 424 return (__m256i) __builtin_ia32_vpshld_v4di ((__v4di)__A, (__v4di) __B, __C); 425 } 426 427 extern __inline __m128i 428 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 429 _mm_mask_shldi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 430 int __E) 431 { 432 return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__C, (__v8hi) __D, 433 __E, (__v8hi) __A, (__mmask8)__B); 434 } 435 436 extern __inline __m128i 437 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 438 _mm_maskz_shldi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 439 { 440 return (__m128i)__builtin_ia32_vpshld_v8hi_mask ((__v8hi)__B, (__v8hi) __C, 441 __D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A); 442 } 443 444 extern __inline __m128i 445 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 446 _mm_shldi_epi16 (__m128i __A, __m128i __B, int __C) 447 { 448 return (__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)__A, (__v8hi) __B, __C); 449 } 450 451 extern __inline __m128i 452 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 453 _mm_mask_shldi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 454 int __E) 455 { 456 return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__C, (__v4si) __D, 457 __E, (__v4si) __A, (__mmask8)__B); 458 } 459 460 extern __inline __m128i 461 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 462 _mm_maskz_shldi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 463 { 464 return (__m128i)__builtin_ia32_vpshld_v4si_mask ((__v4si)__B, (__v4si) __C, 465 __D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A); 466 } 467 468 extern __inline __m128i 469 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 470 _mm_shldi_epi32 (__m128i __A, __m128i __B, int __C) 471 { 472 return (__m128i) __builtin_ia32_vpshld_v4si ((__v4si)__A, (__v4si) __B, __C); 473 } 474 475 extern __inline __m128i 476 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 477 _mm_mask_shldi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D, 478 int __E) 479 { 480 return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__C, (__v2di) __D, 481 __E, (__v2di) __A, (__mmask8)__B); 482 } 483 484 extern __inline __m128i 485 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 486 _mm_maskz_shldi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D) 487 { 488 return (__m128i)__builtin_ia32_vpshld_v2di_mask ((__v2di)__B, (__v2di) __C, 489 __D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A); 490 } 491 492 extern __inline __m128i 493 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 494 _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C) 495 { 496 return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C); 497 } 498 #else 499 #define _mm256_shrdi_epi16(A, B, C) \ 500 ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \ 501 (__v16hi)(__m256i)(B),(int)(C))) 502 #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \ 503 ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \ 504 (__v16hi)(__m256i)(D), \ 505 (int)(E), \ 506 (__v16hi)(__m256i)(A), \ 507 (__mmask16)(B))) 508 #define _mm256_maskz_shrdi_epi16(A, B, C, D) \ 509 ((__m256i) \ 510 __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \ 511 (__v16hi)(__m256i)(C),(int)(D), \ 512 (__v16hi)(__m256i)_mm256_setzero_si256 (), \ 513 (__mmask16)(A))) 514 #define _mm256_shrdi_epi32(A, B, C) \ 515 ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \ 516 (__v8si)(__m256i)(B),(int)(C))) 517 #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \ 518 ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \ 519 (__v8si)(__m256i)(D), \ 520 (int)(E), \ 521 (__v8si)(__m256i)(A), \ 522 (__mmask8)(B))) 523 #define _mm256_maskz_shrdi_epi32(A, B, C, D) \ 524 ((__m256i) \ 525 __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \ 526 (__v8si)(__m256i)(C),(int)(D), \ 527 (__v8si)(__m256i)_mm256_setzero_si256 (), \ 528 (__mmask8)(A))) 529 #define _mm256_shrdi_epi64(A, B, C) \ 530 ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \ 531 (__v4di)(__m256i)(B),(int)(C))) 532 #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \ 533 ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \ 534 (__v4di)(__m256i)(D), (int)(E), \ 535 (__v4di)(__m256i)(A), \ 536 (__mmask8)(B))) 537 #define _mm256_maskz_shrdi_epi64(A, B, C, D) \ 538 ((__m256i) \ 539 __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \ 540 (__v4di)(__m256i)(C),(int)(D), \ 541 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 542 (__mmask8)(A))) 543 #define _mm_shrdi_epi16(A, B, C) \ 544 ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \ 545 (__v8hi)(__m128i)(B),(int)(C))) 546 #define _mm_mask_shrdi_epi16(A, B, C, D, E) \ 547 ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \ 548 (__v8hi)(__m128i)(D), (int)(E), \ 549 (__v8hi)(__m128i)(A), \ 550 (__mmask8)(B))) 551 #define _mm_maskz_shrdi_epi16(A, B, C, D) \ 552 ((__m128i) \ 553 __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \ 554 (__v8hi)(__m128i)(C),(int)(D), \ 555 (__v8hi)(__m128i)_mm_setzero_si128 (), \ 556 (__mmask8)(A))) 557 #define _mm_shrdi_epi32(A, B, C) \ 558 ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \ 559 (__v4si)(__m128i)(B),(int)(C))) 560 #define _mm_mask_shrdi_epi32(A, B, C, D, E) \ 561 ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \ 562 (__v4si)(__m128i)(D), (int)(E), \ 563 (__v4si)(__m128i)(A), \ 564 (__mmask8)(B))) 565 #define _mm_maskz_shrdi_epi32(A, B, C, D) \ 566 ((__m128i) \ 567 __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \ 568 (__v4si)(__m128i)(C),(int)(D), \ 569 (__v4si)(__m128i)_mm_setzero_si128 (), \ 570 (__mmask8)(A))) 571 #define _mm_shrdi_epi64(A, B, C) \ 572 ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \ 573 (__v2di)(__m128i)(B),(int)(C))) 574 #define _mm_mask_shrdi_epi64(A, B, C, D, E) \ 575 ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \ 576 (__v2di)(__m128i)(D), (int)(E), \ 577 (__v2di)(__m128i)(A), \ 578 (__mmask8)(B))) 579 #define _mm_maskz_shrdi_epi64(A, B, C, D) \ 580 ((__m128i) \ 581 __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \ 582 (__v2di)(__m128i)(C),(int)(D), \ 583 (__v2di)(__m128i)_mm_setzero_si128 (), \ 584 (__mmask8)(A))) 585 #define _mm256_shldi_epi16(A, B, C) \ 586 ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \ 587 (__v16hi)(__m256i)(B),(int)(C))) 588 #define _mm256_mask_shldi_epi16(A, B, C, D, E) \ 589 ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \ 590 (__v16hi)(__m256i)(D), \ 591 (int)(E), \ 592 (__v16hi)(__m256i)(A), \ 593 (__mmask16)(B))) 594 #define _mm256_maskz_shldi_epi16(A, B, C, D) \ 595 ((__m256i) \ 596 __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \ 597 (__v16hi)(__m256i)(C),(int)(D), \ 598 (__v16hi)(__m256i)_mm256_setzero_si256 (), \ 599 (__mmask16)(A))) 600 #define _mm256_shldi_epi32(A, B, C) \ 601 ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \ 602 (__v8si)(__m256i)(B),(int)(C))) 603 #define _mm256_mask_shldi_epi32(A, B, C, D, E) \ 604 ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \ 605 (__v8si)(__m256i)(D), (int)(E), \ 606 (__v8si)(__m256i)(A), \ 607 (__mmask8)(B))) 608 #define _mm256_maskz_shldi_epi32(A, B, C, D) \ 609 ((__m256i) \ 610 __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \ 611 (__v8si)(__m256i)(C),(int)(D), \ 612 (__v8si)(__m256i)_mm256_setzero_si256 (), \ 613 (__mmask8)(A))) 614 #define _mm256_shldi_epi64(A, B, C) \ 615 ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \ 616 (__v4di)(__m256i)(B),(int)(C))) 617 #define _mm256_mask_shldi_epi64(A, B, C, D, E) \ 618 ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \ 619 (__v4di)(__m256i)(D), (int)(E), \ 620 (__v4di)(__m256i)(A), \ 621 (__mmask8)(B))) 622 #define _mm256_maskz_shldi_epi64(A, B, C, D) \ 623 ((__m256i) \ 624 __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \ 625 (__v4di)(__m256i)(C),(int)(D), \ 626 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 627 (__mmask8)(A))) 628 #define _mm_shldi_epi16(A, B, C) \ 629 ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \ 630 (__v8hi)(__m128i)(B),(int)(C))) 631 #define _mm_mask_shldi_epi16(A, B, C, D, E) \ 632 ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \ 633 (__v8hi)(__m128i)(D), (int)(E), \ 634 (__v8hi)(__m128i)(A), \ 635 (__mmask8)(B))) 636 #define _mm_maskz_shldi_epi16(A, B, C, D) \ 637 ((__m128i) \ 638 __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \ 639 (__v8hi)(__m128i)(C),(int)(D), \ 640 (__v8hi)(__m128i)_mm_setzero_si128 (), \ 641 (__mmask8)(A))) 642 #define _mm_shldi_epi32(A, B, C) \ 643 ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \ 644 (__v4si)(__m128i)(B),(int)(C))) 645 #define _mm_mask_shldi_epi32(A, B, C, D, E) \ 646 ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \ 647 (__v4si)(__m128i)(D), (int)(E), \ 648 (__v4si)(__m128i)(A), \ 649 (__mmask8)(B))) 650 #define _mm_maskz_shldi_epi32(A, B, C, D) \ 651 ((__m128i) \ 652 __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \ 653 (__v4si)(__m128i)(C),(int)(D), \ 654 (__v4si)(__m128i)_mm_setzero_si128 (), \ 655 (__mmask8)(A))) 656 #define _mm_shldi_epi64(A, B, C) \ 657 ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \ 658 (__v2di)(__m128i)(B),(int)(C))) 659 #define _mm_mask_shldi_epi64(A, B, C, D, E) \ 660 ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \ 661 (__v2di)(__m128i)(D), (int)(E), \ 662 (__v2di)(__m128i)(A), \ 663 (__mmask8)(B))) 664 #define _mm_maskz_shldi_epi64(A, B, C, D) \ 665 ((__m128i) \ 666 __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \ 667 (__v2di)(__m128i)(C),(int)(D), \ 668 (__v2di)(__m128i)_mm_setzero_si128 (), \ 669 (__mmask8)(A))) 670 #endif 671 672 extern __inline __m256i 673 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 674 _mm256_shrdv_epi16 (__m256i __A, __m256i __B, __m256i __C) 675 { 676 return (__m256i) __builtin_ia32_vpshrdv_v16hi ((__v16hi)__A, (__v16hi) __B, 677 (__v16hi) __C); 678 } 679 680 extern __inline __m256i 681 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 682 _mm256_mask_shrdv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) 683 { 684 return (__m256i)__builtin_ia32_vpshrdv_v16hi_mask ((__v16hi)__A, 685 (__v16hi) __C, (__v16hi) __D, (__mmask16)__B); 686 } 687 688 extern __inline __m256i 689 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 690 _mm256_maskz_shrdv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) 691 { 692 return (__m256i)__builtin_ia32_vpshrdv_v16hi_maskz ((__v16hi)__B, 693 (__v16hi) __C, (__v16hi) __D, (__mmask16)__A); 694 } 695 696 extern __inline __m256i 697 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 698 _mm256_shrdv_epi32 (__m256i __A, __m256i __B, __m256i __C) 699 { 700 return (__m256i) __builtin_ia32_vpshrdv_v8si ((__v8si)__A, (__v8si) __B, 701 (__v8si) __C); 702 } 703 704 extern __inline __m256i 705 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 706 _mm256_mask_shrdv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 707 { 708 return (__m256i)__builtin_ia32_vpshrdv_v8si_mask ((__v8si)__A, (__v8si) __C, 709 (__v8si) __D, (__mmask8)__B); 710 } 711 712 extern __inline __m256i 713 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 714 _mm256_maskz_shrdv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 715 { 716 return (__m256i)__builtin_ia32_vpshrdv_v8si_maskz ((__v8si)__B, (__v8si) __C, 717 (__v8si) __D, (__mmask8)__A); 718 } 719 720 extern __inline __m256i 721 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 722 _mm256_shrdv_epi64 (__m256i __A, __m256i __B, __m256i __C) 723 { 724 return (__m256i) __builtin_ia32_vpshrdv_v4di ((__v4di)__A, (__v4di) __B, 725 (__v4di) __C); 726 } 727 728 extern __inline __m256i 729 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 730 _mm256_mask_shrdv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 731 { 732 return (__m256i)__builtin_ia32_vpshrdv_v4di_mask ((__v4di)__A, (__v4di) __C, 733 (__v4di) __D, (__mmask8)__B); 734 } 735 736 extern __inline __m256i 737 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 738 _mm256_maskz_shrdv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 739 { 740 return (__m256i)__builtin_ia32_vpshrdv_v4di_maskz ((__v4di)__B, (__v4di) __C, 741 (__v4di) __D, (__mmask8)__A); 742 } 743 744 extern __inline __m128i 745 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 746 _mm_shrdv_epi16 (__m128i __A, __m128i __B, __m128i __C) 747 { 748 return (__m128i) __builtin_ia32_vpshrdv_v8hi ((__v8hi)__A, (__v8hi) __B, 749 (__v8hi) __C); 750 } 751 752 extern __inline __m128i 753 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 754 _mm_mask_shrdv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 755 { 756 return (__m128i)__builtin_ia32_vpshrdv_v8hi_mask ((__v8hi)__A, (__v8hi) __C, 757 (__v8hi) __D, (__mmask8)__B); 758 } 759 760 extern __inline __m128i 761 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 762 _mm_maskz_shrdv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 763 { 764 return (__m128i)__builtin_ia32_vpshrdv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C, 765 (__v8hi) __D, (__mmask8)__A); 766 } 767 768 extern __inline __m128i 769 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 770 _mm_shrdv_epi32 (__m128i __A, __m128i __B, __m128i __C) 771 { 772 return (__m128i) __builtin_ia32_vpshrdv_v4si ((__v4si)__A, (__v4si) __B, 773 (__v4si) __C); 774 } 775 776 extern __inline __m128i 777 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 778 _mm_mask_shrdv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 779 { 780 return (__m128i)__builtin_ia32_vpshrdv_v4si_mask ((__v4si)__A, (__v4si) __C, 781 (__v4si) __D, (__mmask8)__B); 782 } 783 784 extern __inline __m128i 785 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 786 _mm_maskz_shrdv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 787 { 788 return (__m128i)__builtin_ia32_vpshrdv_v4si_maskz ((__v4si)__B, (__v4si) __C, 789 (__v4si) __D, (__mmask8)__A); 790 } 791 792 extern __inline __m128i 793 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 794 _mm_shrdv_epi64 (__m128i __A, __m128i __B, __m128i __C) 795 { 796 return (__m128i) __builtin_ia32_vpshrdv_v2di ((__v2di)__A, (__v2di) __B, 797 (__v2di) __C); 798 } 799 800 extern __inline __m128i 801 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 802 _mm_mask_shrdv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 803 { 804 return (__m128i)__builtin_ia32_vpshrdv_v2di_mask ((__v2di)__A, (__v2di) __C, 805 (__v2di) __D, (__mmask8)__B); 806 } 807 808 extern __inline __m128i 809 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 810 _mm_maskz_shrdv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 811 { 812 return (__m128i)__builtin_ia32_vpshrdv_v2di_maskz ((__v2di)__B, (__v2di) __C, 813 (__v2di) __D, (__mmask8)__A); 814 } 815 816 extern __inline __m256i 817 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 818 _mm256_shldv_epi16 (__m256i __A, __m256i __B, __m256i __C) 819 { 820 return (__m256i) __builtin_ia32_vpshldv_v16hi ((__v16hi)__A, (__v16hi) __B, 821 (__v16hi) __C); 822 } 823 824 extern __inline __m256i 825 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 826 _mm256_mask_shldv_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D) 827 { 828 return (__m256i)__builtin_ia32_vpshldv_v16hi_mask ((__v16hi)__A, 829 (__v16hi) __C, (__v16hi) __D, (__mmask16)__B); 830 } 831 832 extern __inline __m256i 833 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 834 _mm256_maskz_shldv_epi16 (__mmask16 __A, __m256i __B, __m256i __C, __m256i __D) 835 { 836 return (__m256i)__builtin_ia32_vpshldv_v16hi_maskz ((__v16hi)__B, 837 (__v16hi) __C, (__v16hi) __D, (__mmask16)__A); 838 } 839 840 extern __inline __m256i 841 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 842 _mm256_shldv_epi32 (__m256i __A, __m256i __B, __m256i __C) 843 { 844 return (__m256i) __builtin_ia32_vpshldv_v8si ((__v8si)__A, (__v8si) __B, 845 (__v8si) __C); 846 } 847 848 extern __inline __m256i 849 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 850 _mm256_mask_shldv_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 851 { 852 return (__m256i)__builtin_ia32_vpshldv_v8si_mask ((__v8si)__A, (__v8si) __C, 853 (__v8si) __D, (__mmask8)__B) ; 854 } 855 856 extern __inline __m256i 857 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 858 _mm256_maskz_shldv_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 859 { 860 return (__m256i)__builtin_ia32_vpshldv_v8si_maskz ((__v8si)__B, (__v8si) __C, 861 (__v8si) __D, (__mmask8)__A); 862 } 863 864 extern __inline __m256i 865 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 866 _mm256_shldv_epi64 (__m256i __A, __m256i __B, __m256i __C) 867 { 868 return (__m256i) __builtin_ia32_vpshldv_v4di ((__v4di)__A, (__v4di) __B, 869 (__v4di) __C); 870 } 871 872 extern __inline __m256i 873 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 874 _mm256_mask_shldv_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) 875 { 876 return (__m256i)__builtin_ia32_vpshldv_v4di_mask ((__v4di)__A, (__v4di) __C, 877 (__v4di) __D, (__mmask8)__B); 878 } 879 880 extern __inline __m256i 881 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 882 _mm256_maskz_shldv_epi64 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) 883 { 884 return (__m256i)__builtin_ia32_vpshldv_v4di_maskz ((__v4di)__B, (__v4di) __C, 885 (__v4di) __D, (__mmask8)__A); 886 } 887 888 extern __inline __m128i 889 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 890 _mm_shldv_epi16 (__m128i __A, __m128i __B, __m128i __C) 891 { 892 return (__m128i) __builtin_ia32_vpshldv_v8hi ((__v8hi)__A, (__v8hi) __B, 893 (__v8hi) __C); 894 } 895 896 extern __inline __m128i 897 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 898 _mm_mask_shldv_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 899 { 900 return (__m128i)__builtin_ia32_vpshldv_v8hi_mask ((__v8hi)__A, (__v8hi) __C, 901 (__v8hi) __D, (__mmask8)__B); 902 } 903 904 extern __inline __m128i 905 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 906 _mm_maskz_shldv_epi16 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 907 { 908 return (__m128i)__builtin_ia32_vpshldv_v8hi_maskz ((__v8hi)__B, (__v8hi) __C, 909 (__v8hi) __D, (__mmask8)__A); 910 } 911 912 extern __inline __m128i 913 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 914 _mm_shldv_epi32 (__m128i __A, __m128i __B, __m128i __C) 915 { 916 return (__m128i) __builtin_ia32_vpshldv_v4si ((__v4si)__A, (__v4si) __B, 917 (__v4si) __C); 918 } 919 920 extern __inline __m128i 921 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 922 _mm_mask_shldv_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 923 { 924 return (__m128i)__builtin_ia32_vpshldv_v4si_mask ((__v4si)__A, (__v4si) __C, 925 (__v4si) __D, (__mmask8)__B); 926 } 927 928 extern __inline __m128i 929 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 930 _mm_maskz_shldv_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 931 { 932 return (__m128i)__builtin_ia32_vpshldv_v4si_maskz ((__v4si)__B, (__v4si) __C, 933 (__v4si) __D, (__mmask8)__A); 934 } 935 936 extern __inline __m128i 937 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 938 _mm_shldv_epi64 (__m128i __A, __m128i __B, __m128i __C) 939 { 940 return (__m128i) __builtin_ia32_vpshldv_v2di ((__v2di)__A, (__v2di) __B, 941 (__v2di) __C); 942 } 943 944 extern __inline __m128i 945 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 946 _mm_mask_shldv_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) 947 { 948 return (__m128i)__builtin_ia32_vpshldv_v2di_mask ((__v2di)__A, (__v2di) __C, 949 (__v2di) __D, (__mmask8)__B); 950 } 951 952 extern __inline __m128i 953 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 954 _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) 955 { 956 return (__m128i)__builtin_ia32_vpshldv_v2di_maskz ((__v2di)__B, (__v2di) __C, 957 (__v2di) __D, (__mmask8)__A); 958 } 959 960 961 962 963 #ifdef __DISABLE_AVX512VBMI2VL__ 964 #undef __DISABLE_AVX512VBMI2VL__ 965 #pragma GCC pop_options 966 #endif /* __DISABLE_AVX512VBMIVL__ */ 967 968 #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \ 969 !defined(__AVX512BW__) 970 #pragma GCC push_options 971 #pragma GCC target("avx512vbmi2,avx512vl,avx512bw") 972 #define __DISABLE_AVX512VBMI2VLBW__ 973 #endif /* __AVX512VBMIVLBW__ */ 974 975 extern __inline __m256i 976 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 977 _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C) 978 { 979 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi)__C, 980 (__v32qi)__A, (__mmask32)__B); 981 } 982 983 extern __inline __m256i 984 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 985 _mm256_maskz_compress_epi8 (__mmask32 __A, __m256i __B) 986 { 987 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __B, 988 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 989 } 990 991 extern __inline void 992 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 993 _mm256_mask_compressstoreu_epi8 (void * __A, __mmask32 __B, __m256i __C) 994 { 995 __builtin_ia32_compressstoreuqi256_mask ((__v32qi *) __A, (__v32qi) __C, 996 (__mmask32) __B); 997 } 998 999 extern __inline __m256i 1000 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1001 _mm256_mask_expand_epi8 (__m256i __A, __mmask32 __B, __m256i __C) 1002 { 1003 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __C, 1004 (__v32qi) __A, 1005 (__mmask32) __B); 1006 } 1007 1008 extern __inline __m256i 1009 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1010 _mm256_maskz_expand_epi8 (__mmask32 __A, __m256i __B) 1011 { 1012 return (__m256i) __builtin_ia32_expandqi256_maskz ((__v32qi) __B, 1013 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 1014 } 1015 1016 extern __inline __m256i 1017 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1018 _mm256_mask_expandloadu_epi8 (__m256i __A, __mmask32 __B, const void * __C) 1019 { 1020 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *) __C, 1021 (__v32qi) __A, (__mmask32) __B); 1022 } 1023 1024 extern __inline __m256i 1025 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1026 _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B) 1027 { 1028 return (__m256i) __builtin_ia32_expandloadqi256_maskz ((const __v32qi *) __B, 1029 (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); 1030 } 1031 1032 #ifdef __DISABLE_AVX512VBMI2VLBW__ 1033 #undef __DISABLE_AVX512VBMI2VLBW__ 1034 #pragma GCC pop_options 1035 #endif /* __DISABLE_AVX512VBMIVLBW__ */ 1036 1037 #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */ 1038