Lines Matching defs:__a
67 /// \param __a
74 _mm256_add_pd(__m256d __a, __m256d __b)
76 return (__m256d)((__v4df)__a+(__v4df)__b);
85 /// \param __a
92 _mm256_add_ps(__m256 __a, __m256 __b)
94 return (__m256)((__v8sf)__a+(__v8sf)__b);
103 /// \param __a
110 _mm256_sub_pd(__m256d __a, __m256d __b)
112 return (__m256d)((__v4df)__a-(__v4df)__b);
121 /// \param __a
128 _mm256_sub_ps(__m256 __a, __m256 __b)
130 return (__m256)((__v8sf)__a-(__v8sf)__b);
140 /// \param __a
147 _mm256_addsub_pd(__m256d __a, __m256d __b)
149 return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
159 /// \param __a
166 _mm256_addsub_ps(__m256 __a, __m256 __b)
168 return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
177 /// \param __a
184 _mm256_div_pd(__m256d __a, __m256d __b)
186 return (__m256d)((__v4df)__a/(__v4df)__b);
195 /// \param __a
202 _mm256_div_ps(__m256 __a, __m256 __b)
204 return (__m256)((__v8sf)__a/(__v8sf)__b);
216 /// \param __a
223 _mm256_max_pd(__m256d __a, __m256d __b)
225 return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
237 /// \param __a
244 _mm256_max_ps(__m256 __a, __m256 __b)
246 return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
258 /// \param __a
265 _mm256_min_pd(__m256d __a, __m256d __b)
267 return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
279 /// \param __a
286 _mm256_min_ps(__m256 __a, __m256 __b)
288 return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
297 /// \param __a
304 _mm256_mul_pd(__m256d __a, __m256d __b)
306 return (__m256d)((__v4df)__a * (__v4df)__b);
315 /// \param __a
322 _mm256_mul_ps(__m256 __a, __m256 __b)
324 return (__m256)((__v8sf)__a * (__v8sf)__b);
334 /// \param __a
339 _mm256_sqrt_pd(__m256d __a)
341 return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
351 /// \param __a
356 _mm256_sqrt_ps(__m256 __a)
358 return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
368 /// \param __a
373 _mm256_rsqrt_ps(__m256 __a)
375 return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
385 /// \param __a
390 _mm256_rcp_ps(__m256 __a)
392 return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
535 /// \param __a
542 _mm256_and_pd(__m256d __a, __m256d __b)
544 return (__m256d)((__v4du)__a & (__v4du)__b);
553 /// \param __a
560 _mm256_and_ps(__m256 __a, __m256 __b)
562 return (__m256)((__v8su)__a & (__v8su)__b);
572 /// \param __a
581 _mm256_andnot_pd(__m256d __a, __m256d __b)
583 return (__m256d)(~(__v4du)__a & (__v4du)__b);
593 /// \param __a
602 _mm256_andnot_ps(__m256 __a, __m256 __b)
604 return (__m256)(~(__v8su)__a & (__v8su)__b);
613 /// \param __a
620 _mm256_or_pd(__m256d __a, __m256d __b)
622 return (__m256d)((__v4du)__a | (__v4du)__b);
631 /// \param __a
638 _mm256_or_ps(__m256 __a, __m256 __b)
640 return (__m256)((__v8su)__a | (__v8su)__b);
649 /// \param __a
656 _mm256_xor_pd(__m256d __a, __m256d __b)
658 return (__m256d)((__v4du)__a ^ (__v4du)__b);
667 /// \param __a
674 _mm256_xor_ps(__m256 __a, __m256 __b)
676 return (__m256)((__v8su)__a ^ (__v8su)__b);
687 /// \param __a
698 _mm256_hadd_pd(__m256d __a, __m256d __b)
700 return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
710 /// \param __a
721 _mm256_hadd_ps(__m256 __a, __m256 __b)
723 return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
733 /// \param __a
744 _mm256_hsub_pd(__m256d __a, __m256d __b)
746 return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
756 /// \param __a
767 _mm256_hsub_ps(__m256 __a, __m256 __b)
769 return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
780 /// \param __a
797 _mm_permutevar_pd(__m128d __a, __m128i __c)
799 return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
809 /// \param __a
836 _mm256_permutevar_pd(__m256d __a, __m256i __c)
838 return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
848 /// \param __a
891 _mm_permutevar_ps(__m128 __a, __m128i __c)
893 return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
903 /// \param __a
982 _mm256_permutevar_ps(__m256 __a, __m256i __c)
984 return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
1392 /// \param __a
1400 /// corresponding 64-bit element in operand \a __a is copied to the same
1406 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1409 (__v4df)__a, (__v4df)__b, (__v4df)__c);
1420 /// \param __a
1428 /// a mask bit is 0, the corresponding 32-bit element in operand \a __a is
1434 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1437 (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
2173 /// \param __a
2177 _mm256_cvtepi32_pd(__m128i __a)
2179 return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
2188 /// \param __a
2192 _mm256_cvtepi32_ps(__m256i __a)
2194 return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);
2204 /// \param __a
2208 _mm256_cvtpd_ps(__m256d __a)
2210 return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
2223 /// \param __a
2227 _mm256_cvtps_epi32(__m256 __a)
2229 return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
2239 /// \param __a
2243 _mm256_cvtps_pd(__m128 __a)
2245 return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
2260 /// \param __a
2264 _mm256_cvttpd_epi32(__m256d __a)
2266 return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
2280 /// \param __a
2284 _mm256_cvtpd_epi32(__m256d __a)
2286 return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
2300 /// \param __a
2304 _mm256_cvttps_epi32(__m256 __a)
2306 return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
2316 /// \param __a
2320 _mm256_cvtsd_f64(__m256d __a)
2322 return __a[0];
2332 /// \param __a
2336 _mm256_cvtsi256_si32(__m256i __a)
2338 __v8si __b = (__v8si)__a;
2349 /// \param __a
2353 _mm256_cvtss_f32(__m256 __a)
2355 return __a[0];
2366 /// \param __a
2368 /// Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of
2370 /// Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2372 /// Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the
2374 /// Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the
2379 _mm256_movehdup_ps(__m256 __a)
2381 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
2391 /// \param __a
2393 /// Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of
2395 /// Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2397 /// Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the
2399 /// Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the
2404 _mm256_moveldup_ps(__m256 __a)
2406 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
2417 /// \param __a
2419 /// Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the
2421 /// Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2426 _mm256_movedup_pd(__m256d __a)
2428 return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
2439 /// \param __a
2449 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
2451 return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
2461 /// \param __a
2471 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
2473 return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
2484 /// \param __a
2498 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
2500 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
2511 /// \param __a
2525 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
2527 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
2549 /// \param __a
2555 _mm_testz_pd(__m128d __a, __m128d __b)
2557 return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
2578 /// \param __a
2584 _mm_testc_pd(__m128d __a, __m128d __b)
2586 return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
2608 /// \param __a
2614 _mm_testnzc_pd(__m128d __a, __m128d __b)
2616 return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
2637 /// \param __a
2643 _mm_testz_ps(__m128 __a, __m128 __b)
2645 return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
2666 /// \param __a
2672 _mm_testc_ps(__m128 __a, __m128 __b)
2674 return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
2696 /// \param __a
2702 _mm_testnzc_ps(__m128 __a, __m128 __b)
2704 return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
2725 /// \param __a
2731 _mm256_testz_pd(__m256d __a, __m256d __b)
2733 return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
2754 /// \param __a
2760 _mm256_testc_pd(__m256d __a, __m256d __b)
2762 return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
2784 /// \param __a
2790 _mm256_testnzc_pd(__m256d __a, __m256d __b)
2792 return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
2813 /// \param __a
2819 _mm256_testz_ps(__m256 __a, __m256 __b)
2821 return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
2842 /// \param __a
2848 _mm256_testc_ps(__m256 __a, __m256 __b)
2850 return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
2872 /// \param __a
2878 _mm256_testnzc_ps(__m256 __a, __m256 __b)
2880 return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
2898 /// \param __a
2904 _mm256_testz_si256(__m256i __a, __m256i __b)
2906 return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
2924 /// \param __a
2930 _mm256_testc_si256(__m256i __a, __m256i __b)
2932 return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
2951 /// \param __a
2957 _mm256_testnzc_si256(__m256i __a, __m256i __b)
2959 return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
2971 /// \param __a
2976 _mm256_movemask_pd(__m256d __a)
2978 return __builtin_ia32_movmskpd256((__v4df)__a);
2989 /// \param __a
2994 _mm256_movemask_ps(__m256 __a)
2996 return __builtin_ia32_movmskps256((__v8sf)__a);
3024 /// specified address pointed to by \a __a and broadcasts it to the elements
3031 /// \param __a
3036 _mm_broadcast_ss(float const *__a)
3041 float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f;
3046 /// specified address pointed to by \a __a and broadcasts it to the elements
3053 /// \param __a
3058 _mm256_broadcast_sd(double const *__a)
3063 double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d;
3068 /// specified address pointed to by \a __a and broadcasts it to the elements
3075 /// \param __a
3080 _mm256_broadcast_ss(float const *__a)
3085 float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f;
3090 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3097 /// \param __a
3102 _mm256_broadcast_pd(__m128d const *__a)
3104 __m128d __b = _mm_loadu_pd((const double *)__a);
3110 /// specified address pointed to by \a __a and broadcasts it to 128-bit
3117 /// \param __a
3122 _mm256_broadcast_ps(__m128 const *__a)
3124 __m128 __b = _mm_loadu_ps((const float *)__a);
3269 /// \param __a
3272 _mm256_store_pd(double *__p, __m256d __a)
3274 *(__m256d *)__p = __a;
3287 /// \param __a
3290 _mm256_store_ps(float *__p, __m256 __a)
3292 *(__m256 *)__p = __a;
3305 /// \param __a
3308 _mm256_storeu_pd(double *__p, __m256d __a)
3313 ((struct __storeu_pd*)__p)->__v = __a;
3325 /// \param __a
3328 _mm256_storeu_ps(float *__p, __m256 __a)
3333 ((struct __storeu_ps*)__p)->__v = __a;
3346 /// \param __a
3349 _mm256_store_si256(__m256i *__p, __m256i __a)
3351 *__p = __a;
3363 /// \param __a
3366 _mm256_storeu_si256(__m256i_u *__p, __m256i __a)
3371 ((struct __storeu_si256*)__p)->__v = __a;
3487 /// \a __a is not stored and the corresponding field in the memory location
3489 /// \param __a
3492 _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
3494 __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
3510 /// zero, the corresponding value from vector \a __a is not stored and the
3513 /// \param __a
3516 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
3518 __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
3535 /// __a is not stored and the corresponding field in the memory location
3537 /// \param __a
3540 _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
3542 __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
3558 /// zero, the corresponding value from vector __a is not stored and the
3561 /// \param __a
3564 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
3566 __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
3578 /// \param __a
3584 _mm256_stream_si256(void *__a, __m256i __b)
3587 __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
3598 /// \param __a
3604 _mm256_stream_pd(void *__a, __m256d __b)
3607 __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
3622 /// \param __a
3625 _mm256_stream_ps(void *__p, __m256 __a)
3628 __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
3679 /// \param __a
3693 _mm256_set_pd(double __a, double __b, double __c, double __d)
3695 return __extension__ (__m256d){ __d, __c, __b, __a };
3706 /// \param __a
3732 _mm256_set_ps(float __a, float __b, float __c, float __d,
3735 return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
3920 /// \param __a
3930 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
3932 return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
3945 /// \param __a
3959 _mm256_setr_pd(double __a, double __b, double __c, double __d)
3961 return _mm256_set_pd(__d, __c, __b, __a);
3973 /// \param __a
3999 _mm256_setr_ps(float __a, float __b, float __c, float __d,
4002 return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);
4187 /// \param __a
4197 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
4199 return _mm256_set_epi64x(__d, __c, __b, __a);
4366 /// \param __a
4371 _mm256_castpd_ps(__m256d __a)
4373 return (__m256)__a;
4383 /// \param __a
4388 _mm256_castpd_si256(__m256d __a)
4390 return (__m256i)__a;
4400 /// \param __a
4405 _mm256_castps_pd(__m256 __a)
4407 return (__m256d)__a;
4417 /// \param __a
4422 _mm256_castps_si256(__m256 __a)
4424 return (__m256i)__a;
4434 /// \param __a
4439 _mm256_castsi256_ps(__m256i __a)
4441 return (__m256)__a;
4451 /// \param __a
4456 _mm256_castsi256_pd(__m256i __a)
4458 return (__m256d)__a;
4468 /// \param __a
4473 _mm256_castpd256_pd128(__m256d __a)
4475 return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
4485 /// \param __a
4490 _mm256_castps256_ps128(__m256 __a)
4492 return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
4501 /// \param __a
4506 _mm256_castsi256_si128(__m256i __a)
4508 return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
4521 /// \param __a
4527 _mm256_castpd128_pd256(__m128d __a)
4530 (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4543 /// \param __a
4549 _mm256_castps128_ps256(__m128 __a)
4551 return __builtin_shufflevector((__v4sf)__a,
4552 (__v4sf)__builtin_nondeterministic_value(__a),
4565 /// \param __a
4570 _mm256_castsi128_si256(__m128i __a)
4573 (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4585 /// \param __a
4590 _mm256_zextpd128_pd256(__m128d __a)
4592 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);
4603 /// \param __a
4608 _mm256_zextps128_ps256(__m128 __a)
4610 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);
4621 /// \param __a
4626 _mm256_zextsi128_si256(__m128i __a)
4628 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);
5045 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5049 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5052 /// \param __a
5055 _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
5059 __v128 = _mm256_castps256_ps128(__a);
5061 __v128 = _mm256_extractf128_ps(__a, 1);
5074 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5078 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5081 /// \param __a
5084 _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
5088 __v128 = _mm256_castpd256_pd128(__a);
5090 __v128 = _mm256_extractf128_pd(__a, 1);
5103 /// A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5107 /// A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5110 /// \param __a
5113 _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
5117 __v128 = _mm256_castsi256_si128(__a);
5119 __v128 = _mm256_extractf128_si256(__a, 1);