Lines Matching defs:__a
84 /// \param __a
91 _mm256_add_pd(__m256d __a, __m256d __b)
93 return (__m256d)((__v4df)__a+(__v4df)__b);
102 /// \param __a
109 _mm256_add_ps(__m256 __a, __m256 __b)
111 return (__m256)((__v8sf)__a+(__v8sf)__b);
120 /// \param __a
127 _mm256_sub_pd(__m256d __a, __m256d __b)
129 return (__m256d)((__v4df)__a-(__v4df)__b);
138 /// \param __a
145 _mm256_sub_ps(__m256 __a, __m256 __b)
147 return (__m256)((__v8sf)__a-(__v8sf)__b);
157 /// \param __a
164 _mm256_addsub_pd(__m256d __a, __m256d __b)
166 return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
176 /// \param __a
183 _mm256_addsub_ps(__m256 __a, __m256 __b)
185 return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
194 /// \param __a
201 _mm256_div_pd(__m256d __a, __m256d __b)
203 return (__m256d)((__v4df)__a/(__v4df)__b);
212 /// \param __a
219 _mm256_div_ps(__m256 __a, __m256 __b)
221 return (__m256)((__v8sf)__a/(__v8sf)__b);
233 /// \param __a
240 _mm256_max_pd(__m256d __a, __m256d __b)
242 return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
254 /// \param __a
261 _mm256_max_ps(__m256 __a, __m256 __b)
263 return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
275 /// \param __a
282 _mm256_min_pd(__m256d __a, __m256d __b)
284 return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
296 /// \param __a
303 _mm256_min_ps(__m256 __a, __m256 __b)
305 return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
314 /// \param __a
321 _mm256_mul_pd(__m256d __a, __m256d __b)
323 return (__m256d)((__v4df)__a * (__v4df)__b);
332 /// \param __a
339 _mm256_mul_ps(__m256 __a, __m256 __b)
341 return (__m256)((__v8sf)__a * (__v8sf)__b);
351 /// \param __a
356 _mm256_sqrt_pd(__m256d __a)
358 return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
368 /// \param __a
373 _mm256_sqrt_ps(__m256 __a)
375 return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
385 /// \param __a
390 _mm256_rsqrt_ps(__m256 __a)
392 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
402 /// \param __a
407 _mm256_rcp_ps(__m256 __a)
409 return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
552 /// \param __a
559 _mm256_and_pd(__m256d __a, __m256d __b)
561 return (__m256d)((__v4du)__a & (__v4du)__b);
570 /// \param __a
577 _mm256_and_ps(__m256 __a, __m256 __b)
579 return (__m256)((__v8su)__a & (__v8su)__b);
589 /// \param __a
598 _mm256_andnot_pd(__m256d __a, __m256d __b)
600 return (__m256d)(~(__v4du)__a & (__v4du)__b);
610 /// \param __a
619 _mm256_andnot_ps(__m256 __a, __m256 __b)
621 return (__m256)(~(__v8su)__a & (__v8su)__b);
630 /// \param __a
637 _mm256_or_pd(__m256d __a, __m256d __b)
639 return (__m256d)((__v4du)__a | (__v4du)__b);
648 /// \param __a
655 _mm256_or_ps(__m256 __a, __m256 __b)
657 return (__m256)((__v8su)__a | (__v8su)__b);
666 /// \param __a
673 _mm256_xor_pd(__m256d __a, __m256d __b)
675 return (__m256d)((__v4du)__a ^ (__v4du)__b);
684 /// \param __a
691 _mm256_xor_ps(__m256 __a, __m256 __b)
693 return (__m256)((__v8su)__a ^ (__v8su)__b);
704 /// \param __a
715 _mm256_hadd_pd(__m256d __a, __m256d __b)
717 return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
727 /// \param __a
738 _mm256_hadd_ps(__m256 __a, __m256 __b)
740 return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
750 /// \param __a
761 _mm256_hsub_pd(__m256d __a, __m256d __b)
763 return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
773 /// \param __a
784 _mm256_hsub_ps(__m256 __a, __m256 __b)
786 return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
797 /// \param __a
814 _mm_permutevar_pd(__m128d __a, __m128i __c)
816 return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
826 /// \param __a
853 _mm256_permutevar_pd(__m256d __a, __m256i __c)
855 return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
865 /// \param __a
908 _mm_permutevar_ps(__m128 __a, __m128i __c)
910 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
920 /// \param __a
999 _mm256_permutevar_ps(__m256 __a, __m256i __c)
1001 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
1409 /// \param __a
1417 /// corresponding 64-bit element in operand \a __a is copied to the same
1423 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1426 (__v4df)__a, (__v4df)__b, (__v4df)__c);
1437 /// \param __a
1445 /// a mask bit is 0, the corresponding 32-bit element in operand \a __a is
1451 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1454 (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
2190 /// \param __a
2194 _mm256_cvtepi32_pd(__m128i __a)
2196 return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
2205 /// \param __a
2209 _mm256_cvtepi32_ps(__m256i __a)
2211 return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);
2221 /// \param __a
2225 _mm256_cvtpd_ps(__m256d __a)
2227 return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
2240 /// \param __a
2244 _mm256_cvtps_epi32(__m256 __a)
2246 return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
2256 /// \param __a
2260 _mm256_cvtps_pd(__m128 __a)
2262 return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
2277 /// \param __a
2281 _mm256_cvttpd_epi32(__m256d __a)
2283 return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
2297 /// \param __a
2301 _mm256_cvtpd_epi32(__m256d __a)
2303 return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
2317 /// \param __a
2321 _mm256_cvttps_epi32(__m256 __a)
2323 return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
2333 /// \param __a
2337 _mm256_cvtsd_f64(__m256d __a)
2339 return __a[0];
2349 /// \param __a
2353 _mm256_cvtsi256_si32(__m256i __a)
2355 __v8si __b = (__v8si)__a;
2366 /// \param __a
2370 _mm256_cvtss_f32(__m256 __a)
2372 return __a[0];
2383 /// \param __a
2385 /// Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of
2387 /// Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2389 /// Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the
2391 /// Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the
2396 _mm256_movehdup_ps(__m256 __a)
2398 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
2408 /// \param __a
2410 /// Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of
2412 /// Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2414 /// Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the
2416 /// Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the
2421 _mm256_moveldup_ps(__m256 __a)
2423 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
2434 /// \param __a
2436 /// Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the
2438 /// Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2443 _mm256_movedup_pd(__m256d __a)
2445 return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
2456 /// \param __a
2466 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
2468 return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
2478 /// \param __a
2488 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
2490 return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
2501 /// \param __a
2515 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
2517 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
2528 /// \param __a
2542 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
2544 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
2566 /// \param __a
2572 _mm_testz_pd(__m128d __a, __m128d __b)
2574 return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
2595 /// \param __a
2601 _mm_testc_pd(__m128d __a, __m128d __b)
2603 return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
2625 /// \param __a
2631 _mm_testnzc_pd(__m128d __a, __m128d __b)
2633 return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
2654 /// \param __a
2660 _mm_testz_ps(__m128 __a, __m128 __b)
2662 return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
2683 /// \param __a
2689 _mm_testc_ps(__m128 __a, __m128 __b)
2691 return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
2713 /// \param __a
2719 _mm_testnzc_ps(__m128 __a, __m128 __b)
2721 return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
2742 /// \param __a
2748 _mm256_testz_pd(__m256d __a, __m256d __b)
2750 return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
2771 /// \param __a
2777 _mm256_testc_pd(__m256d __a, __m256d __b)
2779 return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
2801 /// \param __a
2807 _mm256_testnzc_pd(__m256d __a, __m256d __b)
2809 return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
2830 /// \param __a
2836 _mm256_testz_ps(__m256 __a, __m256 __b)
2838 return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
2859 /// \param __a
2865 _mm256_testc_ps(__m256 __a, __m256 __b)
2867 return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
2889 /// \param __a
2895 _mm256_testnzc_ps(__m256 __a, __m256 __b)
2897 return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
2915 /// \param __a
2921 _mm256_testz_si256(__m256i __a, __m256i __b)
2923 return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
2941 /// \param __a
2947 _mm256_testc_si256(__m256i __a, __m256i __b)
2949 return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
2968 /// \param __a
2974 _mm256_testnzc_si256(__m256i __a, __m256i __b)
2976 return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
2988 /// \param __a
2993 _mm256_movemask_pd(__m256d __a)
2995 return __builtin_ia32_movmskpd256((__v4df)__a);
3006 /// \param __a
3011 _mm256_movemask_ps(__m256 __a)
3013 return __builtin_ia32_movmskps256((__v8sf)__a);
3041 /// specified address pointed to by \a __a and broadcasts it to the elements
3048 /// \param __a
3053 _mm_broadcast_ss(float const *__a)
3058 float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f;
3063 /// specified address pointed to by \a __a and broadcasts it to the elements
3070 /// \param __a
3075 _mm256_broadcast_sd(double const *__a)
3080 double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d;
3085 /// specified address pointed to by \a __a and broadcasts it to the elements
3092 /// \param __a
3097 _mm256_broadcast_ss(float const *__a)
3102 float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f;
3107 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3114 /// \param __a
3119 _mm256_broadcast_pd(__m128d const *__a)
3121 __m128d __b = _mm_loadu_pd((const double *)__a);
3127 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3134 /// \param __a
3139 _mm256_broadcast_ps(__m128 const *__a)
3141 __m128 __b = _mm_loadu_ps((const float *)__a);
3286 /// \param __a
3289 _mm256_store_pd(double *__p, __m256d __a)
3291 *(__m256d *)__p = __a;
3304 /// \param __a
3307 _mm256_store_ps(float *__p, __m256 __a)
3309 *(__m256 *)__p = __a;
3322 /// \param __a
3325 _mm256_storeu_pd(double *__p, __m256d __a)
3330 ((struct __storeu_pd*)__p)->__v = __a;
3342 /// \param __a
3345 _mm256_storeu_ps(float *__p, __m256 __a)
3350 ((struct __storeu_ps*)__p)->__v = __a;
3363 /// \param __a
3366 _mm256_store_si256(__m256i *__p, __m256i __a)
3368 *__p = __a;
3380 /// \param __a
3383 _mm256_storeu_si256(__m256i_u *__p, __m256i __a)
3388 ((struct __storeu_si256*)__p)->__v = __a;
3504 /// \a __a is not stored and the corresponding field in the memory location
3506 /// \param __a
3509 _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
3511 __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
3527 /// zero, the corresponding value from vector \a __a is not stored and the
3530 /// \param __a
3533 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
3535 __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
3552 /// __a is not stored and the corresponding field in the memory location
3554 /// \param __a
3557 _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
3559 __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
3575 /// zero, the corresponding value from vector __a is not stored and the
3578 /// \param __a
3581 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
3583 __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
3595 /// \param __a
3601 _mm256_stream_si256(void *__a, __m256i __b)
3604 __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
3615 /// \param __a
3621 _mm256_stream_pd(void *__a, __m256d __b)
3624 __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
3639 /// \param __a
3642 _mm256_stream_ps(void *__p, __m256 __a)
3645 __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
3696 /// \param __a
3710 _mm256_set_pd(double __a, double __b, double __c, double __d)
3712 return __extension__ (__m256d){ __d, __c, __b, __a };
3723 /// \param __a
3749 _mm256_set_ps(float __a, float __b, float __c, float __d,
3752 return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
3937 /// \param __a
3947 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
3949 return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
3962 /// \param __a
3976 _mm256_setr_pd(double __a, double __b, double __c, double __d)
3978 return _mm256_set_pd(__d, __c, __b, __a);
3990 /// \param __a
4016 _mm256_setr_ps(float __a, float __b, float __c, float __d,
4019 return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);
4204 /// \param __a
4214 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
4216 return _mm256_set_epi64x(__d, __c, __b, __a);
4378 /// \param __a
4383 _mm256_castpd_ps(__m256d __a)
4385 return (__m256)__a;
4395 /// \param __a
4400 _mm256_castpd_si256(__m256d __a)
4402 return (__m256i)__a;
4412 /// \param __a
4417 _mm256_castps_pd(__m256 __a)
4419 return (__m256d)__a;
4429 /// \param __a
4434 _mm256_castps_si256(__m256 __a)
4436 return (__m256i)__a;
4446 /// \param __a
4451 _mm256_castsi256_ps(__m256i __a)
4453 return (__m256)__a;
4463 /// \param __a
4468 _mm256_castsi256_pd(__m256i __a)
4470 return (__m256d)__a;
4480 /// \param __a
4485 _mm256_castpd256_pd128(__m256d __a)
4487 return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
4497 /// \param __a
4502 _mm256_castps256_ps128(__m256 __a)
4504 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
4513 /// \param __a
4518 _mm256_castsi256_si128(__m256i __a)
4520 return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
4533 /// \param __a
4539 _mm256_castpd128_pd256(__m128d __a)
4542 (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4555 /// \param __a
4561 _mm256_castps128_ps256(__m128 __a)
4563 return __builtin_shufflevector((__v4sf)__a,
4564 (__v4sf)__builtin_nondeterministic_value(__a),
4577 /// \param __a
4582 _mm256_castsi128_si256(__m128i __a)
4585 (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4597 /// \param __a
4602 _mm256_zextpd128_pd256(__m128d __a)
4604 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);
4615 /// \param __a
4620 _mm256_zextps128_ps256(__m128 __a)
4622 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);
4633 /// \param __a
4638 _mm256_zextsi128_si256(__m128i __a)
4640 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);
5057 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5061 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5064 /// \param __a
5067 _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
5071 __v128 = _mm256_castps256_ps128(__a);
5073 __v128 = _mm256_extractf128_ps(__a, 1);
5086 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5090 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5093 /// \param __a
5096 _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
5100 __v128 = _mm256_castpd256_pd128(__a);
5102 __v128 = _mm256_extractf128_pd(__a, 1);
5115 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5119 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5122 /// \param __a
5125 _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
5129 __v128 = _mm256_castsi256_si128(__a);
5131 __v128 = _mm256_extractf128_si256(__a, 1);