avxintrin.h - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/clang/lib/Headers/avxintrin.h

Lines Matching defs:__a
67 /// \param __a
74 _mm256_add_pd(__m256d __a, __m256d __b)
76   return (__m256d)((__v4df)__a+(__v4df)__b);
85 /// \param __a
92 _mm256_add_ps(__m256 __a, __m256 __b)
94   return (__m256)((__v8sf)__a+(__v8sf)__b);
103 /// \param __a
110 _mm256_sub_pd(__m256d __a, __m256d __b)
112   return (__m256d)((__v4df)__a-(__v4df)__b);
121 /// \param __a
128 _mm256_sub_ps(__m256 __a, __m256 __b)
130   return (__m256)((__v8sf)__a-(__v8sf)__b);
140 /// \param __a
147 _mm256_addsub_pd(__m256d __a, __m256d __b)
149   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
159 /// \param __a
166 _mm256_addsub_ps(__m256 __a, __m256 __b)
168   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
177 /// \param __a
184 _mm256_div_pd(__m256d __a, __m256d __b)
186   return (__m256d)((__v4df)__a/(__v4df)__b);
195 /// \param __a
202 _mm256_div_ps(__m256 __a, __m256 __b)
204   return (__m256)((__v8sf)__a/(__v8sf)__b);
216 /// \param __a
223 _mm256_max_pd(__m256d __a, __m256d __b)
225   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
237 /// \param __a
244 _mm256_max_ps(__m256 __a, __m256 __b)
246   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
258 /// \param __a
265 _mm256_min_pd(__m256d __a, __m256d __b)
267   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
279 /// \param __a
286 _mm256_min_ps(__m256 __a, __m256 __b)
288   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
297 /// \param __a
304 _mm256_mul_pd(__m256d __a, __m256d __b)
306   return (__m256d)((__v4df)__a * (__v4df)__b);
315 /// \param __a
322 _mm256_mul_ps(__m256 __a, __m256 __b)
324   return (__m256)((__v8sf)__a * (__v8sf)__b);
334 /// \param __a
339 _mm256_sqrt_pd(__m256d __a)
341   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
351 /// \param __a
356 _mm256_sqrt_ps(__m256 __a)
358   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
368 /// \param __a
373 _mm256_rsqrt_ps(__m256 __a)
375   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
385 /// \param __a
390 _mm256_rcp_ps(__m256 __a)
392   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
535 /// \param __a
542 _mm256_and_pd(__m256d __a, __m256d __b)
544   return (__m256d)((__v4du)__a & (__v4du)__b);
553 /// \param __a
560 _mm256_and_ps(__m256 __a, __m256 __b)
562   return (__m256)((__v8su)__a & (__v8su)__b);
572 /// \param __a
581 _mm256_andnot_pd(__m256d __a, __m256d __b)
583   return (__m256d)(~(__v4du)__a & (__v4du)__b);
593 /// \param __a
602 _mm256_andnot_ps(__m256 __a, __m256 __b)
604   return (__m256)(~(__v8su)__a & (__v8su)__b);
613 /// \param __a
620 _mm256_or_pd(__m256d __a, __m256d __b)
622   return (__m256d)((__v4du)__a | (__v4du)__b);
631 /// \param __a
638 _mm256_or_ps(__m256 __a, __m256 __b)
640   return (__m256)((__v8su)__a | (__v8su)__b);
649 /// \param __a
656 _mm256_xor_pd(__m256d __a, __m256d __b)
658   return (__m256d)((__v4du)__a ^ (__v4du)__b);
667 /// \param __a
674 _mm256_xor_ps(__m256 __a, __m256 __b)
676   return (__m256)((__v8su)__a ^ (__v8su)__b);
687 /// \param __a
698 _mm256_hadd_pd(__m256d __a, __m256d __b)
700   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
710 /// \param __a
721 _mm256_hadd_ps(__m256 __a, __m256 __b)
723   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
733 /// \param __a
744 _mm256_hsub_pd(__m256d __a, __m256d __b)
746   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
756 /// \param __a
767 _mm256_hsub_ps(__m256 __a, __m256 __b)
769   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
780 /// \param __a
797 _mm_permutevar_pd(__m128d __a, __m128i __c)
799   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
809 /// \param __a
836 _mm256_permutevar_pd(__m256d __a, __m256i __c)
838   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
848 /// \param __a
891 _mm_permutevar_ps(__m128 __a, __m128i __c)
893   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
903 /// \param __a
982 _mm256_permutevar_ps(__m256 __a, __m256i __c)
984   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
1392 /// \param __a
1400 ///    corresponding 64-bit element in operand \a __a is copied to the same
1406 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1409     (__v4df)__a, (__v4df)__b, (__v4df)__c);
1420 /// \param __a
1428 ///    a mask bit is 0, the corresponding 32-bit element in operand \a __a is
1434 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1437     (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
2173 /// \param __a
2177 _mm256_cvtepi32_pd(__m128i __a)
2179   return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
2188 /// \param __a
2192 _mm256_cvtepi32_ps(__m256i __a)
2194   return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);
2204 /// \param __a
2208 _mm256_cvtpd_ps(__m256d __a)
2210   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
2223 /// \param __a
2227 _mm256_cvtps_epi32(__m256 __a)
2229   return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
2239 /// \param __a
2243 _mm256_cvtps_pd(__m128 __a)
2245   return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
2260 /// \param __a
2264 _mm256_cvttpd_epi32(__m256d __a)
2266   return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
2280 /// \param __a
2284 _mm256_cvtpd_epi32(__m256d __a)
2286   return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
2300 /// \param __a
2304 _mm256_cvttps_epi32(__m256 __a)
2306   return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
2316 /// \param __a
2320 _mm256_cvtsd_f64(__m256d __a)
2322  return __a[0];
2332 /// \param __a
2336 _mm256_cvtsi256_si32(__m256i __a)
2338  __v8si __b = (__v8si)__a;
2349 /// \param __a
2353 _mm256_cvtss_f32(__m256 __a)
2355  return __a[0];
2366 /// \param __a
2368 ///    Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of
2370 ///    Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2372 ///    Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the
2374 ///    Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the
2379 _mm256_movehdup_ps(__m256 __a)
2381   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
2391 /// \param __a
2393 ///    Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of
2395 ///    Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2397 ///    Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the
2399 ///    Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the
2404 _mm256_moveldup_ps(__m256 __a)
2406   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
2417 /// \param __a
2419 ///    Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the
2421 ///    Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2426 _mm256_movedup_pd(__m256d __a)
2428   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
2439 /// \param __a
2449 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
2451   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
2461 /// \param __a
2471 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
2473   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
2484 /// \param __a
2498 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
2500   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
2511 /// \param __a
2525 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
2527   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
2549 /// \param __a
2555 _mm_testz_pd(__m128d __a, __m128d __b)
2557   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
2578 /// \param __a
2584 _mm_testc_pd(__m128d __a, __m128d __b)
2586   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
2608 /// \param __a
2614 _mm_testnzc_pd(__m128d __a, __m128d __b)
2616   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
2637 /// \param __a
2643 _mm_testz_ps(__m128 __a, __m128 __b)
2645   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
2666 /// \param __a
2672 _mm_testc_ps(__m128 __a, __m128 __b)
2674   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
2696 /// \param __a
2702 _mm_testnzc_ps(__m128 __a, __m128 __b)
2704   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
2725 /// \param __a
2731 _mm256_testz_pd(__m256d __a, __m256d __b)
2733   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
2754 /// \param __a
2760 _mm256_testc_pd(__m256d __a, __m256d __b)
2762   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
2784 /// \param __a
2790 _mm256_testnzc_pd(__m256d __a, __m256d __b)
2792   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
2813 /// \param __a
2819 _mm256_testz_ps(__m256 __a, __m256 __b)
2821   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
2842 /// \param __a
2848 _mm256_testc_ps(__m256 __a, __m256 __b)
2850   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
2872 /// \param __a
2878 _mm256_testnzc_ps(__m256 __a, __m256 __b)
2880   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
2898 /// \param __a
2904 _mm256_testz_si256(__m256i __a, __m256i __b)
2906   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
2924 /// \param __a
2930 _mm256_testc_si256(__m256i __a, __m256i __b)
2932   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
2951 /// \param __a
2957 _mm256_testnzc_si256(__m256i __a, __m256i __b)
2959   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
2971 /// \param __a
2976 _mm256_movemask_pd(__m256d __a)
2978   return __builtin_ia32_movmskpd256((__v4df)__a);
2989 /// \param __a
2994 _mm256_movemask_ps(__m256 __a)
2996   return __builtin_ia32_movmskps256((__v8sf)__a);
3024 ///    specified address pointed to by \a __a and broadcasts it to the elements
3031 /// \param __a
3036 _mm_broadcast_ss(float const *__a)
3041   float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f;
3046 ///    specified address pointed to by \a __a and broadcasts it to the elements
3053 /// \param __a
3058 _mm256_broadcast_sd(double const *__a)
3063   double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d;
3068 ///    specified address pointed to by \a __a and broadcasts it to the elements
3075 /// \param __a
3080 _mm256_broadcast_ss(float const *__a)
3085   float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f;
3090 ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3097 /// \param __a
3102 _mm256_broadcast_pd(__m128d const *__a)
3104   __m128d __b = _mm_loadu_pd((const double *)__a);
3110 ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3117 /// \param __a
3122 _mm256_broadcast_ps(__m128 const *__a)
3124   __m128 __b = _mm_loadu_ps((const float *)__a);
3269 /// \param __a
3272 _mm256_store_pd(double *__p, __m256d __a)
3274   *(__m256d *)__p = __a;
3287 /// \param __a
3290 _mm256_store_ps(float *__p, __m256 __a)
3292   *(__m256 *)__p = __a;
3305 /// \param __a
3308 _mm256_storeu_pd(double *__p, __m256d __a)
3313   ((struct __storeu_pd*)__p)->__v = __a;
3325 /// \param __a
3328 _mm256_storeu_ps(float *__p, __m256 __a)
3333   ((struct __storeu_ps*)__p)->__v = __a;
3346 /// \param __a
3349 _mm256_store_si256(__m256i *__p, __m256i __a)
3351   *__p = __a;
3363 /// \param __a
3366 _mm256_storeu_si256(__m256i_u *__p, __m256i __a)
3371   ((struct __storeu_si256*)__p)->__v = __a;
3487 ///    \a __a is not stored and the corresponding field in the memory location
3489 /// \param __a
3492 _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
3494   __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
3510 ///    zero, the corresponding value from vector \a __a is not stored and the
3513 /// \param __a
3516 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
3518   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
3535 ///    __a is not stored and the corresponding field in the memory location
3537 /// \param __a
3540 _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
3542   __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
3558 ///    zero, the corresponding value from vector __a is not stored and the
3561 /// \param __a
3564 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
3566   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
3578 /// \param __a
3584 _mm256_stream_si256(void *__a, __m256i __b)
3587   __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
3598 /// \param __a
3604 _mm256_stream_pd(void *__a, __m256d __b)
3607   __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
3622 /// \param __a
3625 _mm256_stream_ps(void *__p, __m256 __a)
3628   __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
3679 /// \param __a
3693 _mm256_set_pd(double __a, double __b, double __c, double __d)
3695   return __extension__ (__m256d){ __d, __c, __b, __a };
3706 /// \param __a
3732 _mm256_set_ps(float __a, float __b, float __c, float __d,
3735   return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
3920 /// \param __a
3930 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
3932   return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
3945 /// \param __a
3959 _mm256_setr_pd(double __a, double __b, double __c, double __d)
3961   return _mm256_set_pd(__d, __c, __b, __a);
3973 /// \param __a
3999 _mm256_setr_ps(float __a, float __b, float __c, float __d,
4002   return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);
4187 /// \param __a
4197 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
4199   return _mm256_set_epi64x(__d, __c, __b, __a);
4366 /// \param __a
4371 _mm256_castpd_ps(__m256d __a)
4373   return (__m256)__a;
4383 /// \param __a
4388 _mm256_castpd_si256(__m256d __a)
4390   return (__m256i)__a;
4400 /// \param __a
4405 _mm256_castps_pd(__m256 __a)
4407   return (__m256d)__a;
4417 /// \param __a
4422 _mm256_castps_si256(__m256 __a)
4424   return (__m256i)__a;
4434 /// \param __a
4439 _mm256_castsi256_ps(__m256i __a)
4441   return (__m256)__a;
4451 /// \param __a
4456 _mm256_castsi256_pd(__m256i __a)
4458   return (__m256d)__a;
4468 /// \param __a
4473 _mm256_castpd256_pd128(__m256d __a)
4475   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
4485 /// \param __a
4490 _mm256_castps256_ps128(__m256 __a)
4492   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
4501 /// \param __a
4506 _mm256_castsi256_si128(__m256i __a)
4508   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
4521 /// \param __a
4527 _mm256_castpd128_pd256(__m128d __a)
4530       (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4543 /// \param __a
4549 _mm256_castps128_ps256(__m128 __a)
4551   return __builtin_shufflevector((__v4sf)__a,
4552                                  (__v4sf)__builtin_nondeterministic_value(__a),
4565 /// \param __a
4570 _mm256_castsi128_si256(__m128i __a)
4573       (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4585 /// \param __a
4590 _mm256_zextpd128_pd256(__m128d __a)
4592   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);
4603 /// \param __a
4608 _mm256_zextps128_ps256(__m128 __a)
4610   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);
4621 /// \param __a
4626 _mm256_zextsi128_si256(__m128i __a)
4628   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);
5045 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5049 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5052 /// \param __a
5055 _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
5059   __v128 = _mm256_castps256_ps128(__a);
5061   __v128 = _mm256_extractf128_ps(__a, 1);
5074 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5078 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5081 /// \param __a
5084 _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
5088   __v128 = _mm256_castpd256_pd128(__a);
5090   __v128 = _mm256_extractf128_pd(__a, 1);
5103 ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5107 ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5110 /// \param __a
5113 _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
5117   __v128 = _mm256_castsi256_si128(__a);
5119   __v128 = _mm256_extractf128_si256(__a, 1);