Lines Matching defs:__a
77 /// \param __a
87 _mm_add_ss(__m128 __a, __m128 __b) {
88 __a[0] += __b[0];
89 return __a;
99 /// \param __a
106 _mm_add_ps(__m128 __a, __m128 __b) {
107 return (__m128)((__v4sf)__a + (__v4sf)__b);
117 /// \param __a
127 _mm_sub_ss(__m128 __a, __m128 __b) {
128 __a[0] -= __b[0];
129 return __a;
140 /// \param __a
147 _mm_sub_ps(__m128 __a, __m128 __b) {
148 return (__m128)((__v4sf)__a - (__v4sf)__b);
158 /// \param __a
168 _mm_mul_ss(__m128 __a, __m128 __b) {
169 __a[0] *= __b[0];
170 return __a;
180 /// \param __a
187 _mm_mul_ps(__m128 __a, __m128 __b) {
188 return (__m128)((__v4sf)__a * (__v4sf)__b);
198 /// \param __a
208 _mm_div_ss(__m128 __a, __m128 __b) {
209 __a[0] /= __b[0];
210 return __a;
219 /// \param __a
226 _mm_div_ps(__m128 __a, __m128 __b) {
227 return (__m128)((__v4sf)__a / (__v4sf)__b);
237 /// \param __a
243 _mm_sqrt_ss(__m128 __a)
245 return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
255 /// \param __a
260 _mm_sqrt_ps(__m128 __a)
262 return __builtin_ia32_sqrtps((__v4sf)__a);
272 /// \param __a
278 _mm_rcp_ss(__m128 __a)
280 return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
290 /// \param __a
295 _mm_rcp_ps(__m128 __a)
297 return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
307 /// \param __a
314 _mm_rsqrt_ss(__m128 __a)
316 return __builtin_ia32_rsqrtss((__v4sf)__a);
326 /// \param __a
331 _mm_rsqrt_ps(__m128 __a)
333 return __builtin_ia32_rsqrtps((__v4sf)__a);
346 /// \param __a
356 _mm_min_ss(__m128 __a, __m128 __b)
358 return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
370 /// \param __a
377 _mm_min_ps(__m128 __a, __m128 __b)
379 return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);
392 /// \param __a
402 _mm_max_ss(__m128 __a, __m128 __b)
404 return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
416 /// \param __a
423 _mm_max_ps(__m128 __a, __m128 __b)
425 return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
434 /// \param __a
441 _mm_and_ps(__m128 __a, __m128 __b) {
442 return (__m128)((__v4su)__a & (__v4su)__b);
453 /// \param __a
462 _mm_andnot_ps(__m128 __a, __m128 __b) {
463 return (__m128)(~(__v4su)__a & (__v4su)__b);
472 /// \param __a
479 _mm_or_ps(__m128 __a, __m128 __b) {
480 return (__m128)((__v4su)__a | (__v4su)__b);
490 /// \param __a
497 _mm_xor_ps(__m128 __a, __m128 __b) {
498 return (__m128)((__v4su)__a ^ (__v4su)__b);
512 /// \param __a
521 _mm_cmpeq_ss(__m128 __a, __m128 __b)
523 return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
536 /// \param __a
542 _mm_cmpeq_ps(__m128 __a, __m128 __b)
544 return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
559 /// \param __a
568 _mm_cmplt_ss(__m128 __a, __m128 __b)
570 return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
584 /// \param __a
590 _mm_cmplt_ps(__m128 __a, __m128 __b)
592 return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
607 /// \param __a
616 _mm_cmple_ss(__m128 __a, __m128 __b)
618 return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
632 /// \param __a
638 _mm_cmple_ps(__m128 __a, __m128 __b)
640 return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
655 /// \param __a
664 _mm_cmpgt_ss(__m128 __a, __m128 __b)
666 return (__m128)__builtin_shufflevector((__v4sf)__a,
667 (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),
682 /// \param __a
688 _mm_cmpgt_ps(__m128 __a, __m128 __b)
690 return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
705 /// \param __a
714 _mm_cmpge_ss(__m128 __a, __m128 __b)
716 return (__m128)__builtin_shufflevector((__v4sf)__a,
717 (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),
732 /// \param __a
738 _mm_cmpge_ps(__m128 __a, __m128 __b)
740 return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
755 /// \param __a
764 _mm_cmpneq_ss(__m128 __a, __m128 __b)
766 return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
780 /// \param __a
786 _mm_cmpneq_ps(__m128 __a, __m128 __b)
788 return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
804 /// \param __a
813 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
815 return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
830 /// \param __a
836 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
838 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
854 /// \param __a
863 _mm_cmpnle_ss(__m128 __a, __m128 __b)
865 return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
880 /// \param __a
886 _mm_cmpnle_ps(__m128 __a, __m128 __b)
888 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
904 /// \param __a
913 _mm_cmpngt_ss(__m128 __a, __m128 __b)
915 return (__m128)__builtin_shufflevector((__v4sf)__a,
916 (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),
932 /// \param __a
938 _mm_cmpngt_ps(__m128 __a, __m128 __b)
940 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
956 /// \param __a
965 _mm_cmpnge_ss(__m128 __a, __m128 __b)
967 return (__m128)__builtin_shufflevector((__v4sf)__a,
968 (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),
984 /// \param __a
990 _mm_cmpnge_ps(__m128 __a, __m128 __b)
992 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
1008 /// \param __a
1017 _mm_cmpord_ss(__m128 __a, __m128 __b)
1019 return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
1035 /// \param __a
1041 _mm_cmpord_ps(__m128 __a, __m128 __b)
1043 return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
1059 /// \param __a
1068 _mm_cmpunord_ss(__m128 __a, __m128 __b)
1070 return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
1086 /// \param __a
1092 _mm_cmpunord_ps(__m128 __a, __m128 __b)
1094 return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
1108 /// \param __a
1116 _mm_comieq_ss(__m128 __a, __m128 __b)
1118 return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
1133 /// \param __a
1141 _mm_comilt_ss(__m128 __a, __m128 __b)
1143 return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
1157 /// \param __a
1165 _mm_comile_ss(__m128 __a, __m128 __b)
1167 return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
1181 /// \param __a
1189 _mm_comigt_ss(__m128 __a, __m128 __b)
1191 return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
1205 /// \param __a
1213 _mm_comige_ss(__m128 __a, __m128 __b)
1215 return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
1229 /// \param __a
1237 _mm_comineq_ss(__m128 __a, __m128 __b)
1239 return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
1252 /// \param __a
1260 _mm_ucomieq_ss(__m128 __a, __m128 __b)
1262 return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
1276 /// \param __a
1284 _mm_ucomilt_ss(__m128 __a, __m128 __b)
1286 return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
1300 /// \param __a
1308 _mm_ucomile_ss(__m128 __a, __m128 __b)
1310 return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
1324 /// \param __a
1332 _mm_ucomigt_ss(__m128 __a, __m128 __b)
1334 return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
1348 /// \param __a
1356 _mm_ucomige_ss(__m128 __a, __m128 __b)
1358 return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
1371 /// \param __a
1379 _mm_ucomineq_ss(__m128 __a, __m128 __b)
1381 return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
1396 /// \param __a
1401 _mm_cvtss_si32(__m128 __a)
1403 return __builtin_ia32_cvtss2si((__v4sf)__a);
1418 /// \param __a
1423 _mm_cvt_ss2si(__m128 __a)
1425 return _mm_cvtss_si32(__a);
1442 /// \param __a
1447 _mm_cvtss_si64(__m128 __a)
1449 return __builtin_ia32_cvtss2si64((__v4sf)__a);
1465 /// \param __a
1469 _mm_cvtps_pi32(__m128 __a)
1471 return __trunc64(__builtin_ia32_cvtps2dq((__v4sf)__zeroupper64(__a)));
1485 /// \param __a
1489 _mm_cvt_ps2pi(__m128 __a)
1491 return _mm_cvtps_pi32(__a);
1506 /// \param __a
1511 _mm_cvttss_si32(__m128 __a)
1513 return __builtin_ia32_cvttss2si((__v4sf)__a);
1528 /// \param __a
1533 _mm_cvtt_ss2si(__m128 __a)
1535 return _mm_cvttss_si32(__a);
1551 /// \param __a
1556 _mm_cvttss_si64(__m128 __a)
1558 return __builtin_ia32_cvttss2si64((__v4sf)__a);
1575 /// \param __a
1579 _mm_cvttps_pi32(__m128 __a)
1581 return __trunc64(__builtin_ia32_cvttps2dq((__v4sf)__zeroupper64(__a)));
1596 /// \param __a
1600 _mm_cvtt_ps2pi(__m128 __a)
1602 return _mm_cvttps_pi32(__a);
1614 /// \param __a
1621 static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsi32_ss(__m128 __a,
1623 __a[0] = __b;
1624 return __a;
1636 /// \param __a
1643 static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvt_si2ss(__m128 __a,
1645 return _mm_cvtsi32_ss(__a, __b);
1659 /// \param __a
1667 _mm_cvtsi64_ss(__m128 __a, long long __b) {
1668 __a[0] = __b;
1669 return __a;
1683 /// \param __a
1692 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
1695 (__v4sf)__a,
1709 /// \param __a
1718 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
1720 return _mm_cvtpi32_ps(__a, __b);
1730 /// \param __a
1735 _mm_cvtss_f32(__m128 __a) {
1736 return __a[0];
1747 /// \param __a
1755 _mm_loadh_pi(__m128 __a, const __m64 *__p)
1763 return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
1774 /// \param __a
1782 _mm_loadl_pi(__m128 __a, const __m64 *__p)
1790 return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
1895 __m128 __a = _mm_load_ps(__p);
1896 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
2042 /// \param __a
2045 _mm_storeh_pi(__m64 *__p, __m128 __a)
2051 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3);
2063 /// \param __a
2066 _mm_storel_pi(__m64 *__p, __m128 __a)
2072 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1);
2084 /// \param __a
2087 _mm_store_ss(float *__p, __m128 __a)
2092 ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
2105 /// \param __a
2108 _mm_storeu_ps(float *__p, __m128 __a)
2113 ((struct __storeu_ps*)__p)->__v = __a;
2126 /// \param __a
2129 _mm_store_ps(float *__p, __m128 __a)
2131 *(__m128*)__p = __a;
2144 /// \param __a
2148 _mm_store1_ps(float *__p, __m128 __a)
2150 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
2151 _mm_store_ps(__p, __a);
2164 /// \param __a
2168 _mm_store_ps1(float *__p, __m128 __a)
2170 _mm_store1_ps(__p, __a);
2184 /// \param __a
2187 _mm_storer_ps(float *__p, __m128 __a)
2189 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
2190 _mm_store_ps(__p, __a);
2242 /// \param __a
2245 _mm_stream_pi(void *__p, __m64 __a)
2247 __builtin_nontemporal_store(__a, (__m64 *)__p);
2261 /// \param __a
2264 _mm_stream_ps(void *__p, __m128 __a)
2266 __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
2350 /// \param __a
2356 _mm_max_pi16(__m64 __a, __m64 __b)
2358 return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b);
2369 /// \param __a
2375 _mm_max_pu8(__m64 __a, __m64 __b)
2377 return (__m64)__builtin_elementwise_max((__v8qu)__a, (__v8qu)__b);
2388 /// \param __a
2394 _mm_min_pi16(__m64 __a, __m64 __b)
2396 return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b);
2407 /// \param __a
2413 _mm_min_pu8(__m64 __a, __m64 __b)
2415 return (__m64)__builtin_elementwise_min((__v8qu)__a, (__v8qu)__b);
2426 /// \param __a
2428 /// \returns The most significant bit from each 8-bit element in \a __a,
2431 _mm_movemask_pi8(__m64 __a)
2433 return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
2444 /// \param __a
2450 _mm_mulhi_pu16(__m64 __a, __m64 __b)
2452 return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a),
2547 /// \param __a
2553 _mm_avg_pu8(__m64 __a, __m64 __b)
2555 return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
2567 /// \param __a
2573 _mm_avg_pu16(__m64 __a, __m64 __b)
2575 return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
2588 /// \param __a
2596 _mm_sad_pu8(__m64 __a, __m64 __b)
2598 return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a),
2766 /// \param __a
2776 _mm_unpackhi_ps(__m128 __a, __m128 __b) {
2777 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
2787 /// \param __a
2797 _mm_unpacklo_ps(__m128 __a, __m128 __b) {
2798 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
2810 /// \param __a
2818 _mm_move_ss(__m128 __a, __m128 __b) {
2819 __a[0] = __b[0];
2820 return __a;
2831 /// \param __a
2839 _mm_movehl_ps(__m128 __a, __m128 __b) {
2840 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
2851 /// \param __a
2859 _mm_movelh_ps(__m128 __a, __m128 __b) {
2860 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
2870 /// \param __a
2876 _mm_cvtpi16_ps(__m64 __a)
2878 return __builtin_convertvector((__v4hi)__a, __v4sf);
2888 /// \param __a
2894 _mm_cvtpu16_ps(__m64 __a)
2896 return __builtin_convertvector((__v4hu)__a, __v4sf);
2906 /// \param __a
2912 _mm_cvtpi8_ps(__m64 __a)
2915 __builtin_shufflevector((__v8qs)__a, __extension__ (__v8qs){},
2926 /// \param __a
2933 _mm_cvtpu8_ps(__m64 __a)
2936 __builtin_shufflevector((__v8qu)__a, __extension__ (__v8qu){},
2947 /// \param __a
2957 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
2960 __builtin_shufflevector((__v2si)__a, (__v2si)__b,
2977 /// \param __a
2982 _mm_cvtps_pi16(__m128 __a)
2985 (__v4si)__builtin_ia32_cvtps2dq((__v4sf)__a), (__v4si)_mm_setzero_ps()));
3002 /// \param __a
3007 _mm_cvtps_pi8(__m128 __a)
3011 __b = _mm_cvtps_pi16(__a);
3026 /// \param __a
3032 _mm_movemask_ps(__m128 __a)
3034 return __builtin_ia32_movmskps((__v4sf)__a);