Lines Matching defs:__a

48 /// \param __a
58 _mm_add_ss(__m128 __a, __m128 __b)
60 __a[0] += __b[0];
61 return __a;
71 /// \param __a
78 _mm_add_ps(__m128 __a, __m128 __b)
80 return (__m128)((__v4sf)__a + (__v4sf)__b);
90 /// \param __a
100 _mm_sub_ss(__m128 __a, __m128 __b)
102 __a[0] -= __b[0];
103 return __a;
114 /// \param __a
121 _mm_sub_ps(__m128 __a, __m128 __b)
123 return (__m128)((__v4sf)__a - (__v4sf)__b);
133 /// \param __a
143 _mm_mul_ss(__m128 __a, __m128 __b)
145 __a[0] *= __b[0];
146 return __a;
156 /// \param __a
163 _mm_mul_ps(__m128 __a, __m128 __b)
165 return (__m128)((__v4sf)__a * (__v4sf)__b);
175 /// \param __a
185 _mm_div_ss(__m128 __a, __m128 __b)
187 __a[0] /= __b[0];
188 return __a;
197 /// \param __a
204 _mm_div_ps(__m128 __a, __m128 __b)
206 return (__m128)((__v4sf)__a / (__v4sf)__b);
216 /// \param __a
222 _mm_sqrt_ss(__m128 __a)
224 return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
234 /// \param __a
239 _mm_sqrt_ps(__m128 __a)
241 return __builtin_ia32_sqrtps((__v4sf)__a);
251 /// \param __a
257 _mm_rcp_ss(__m128 __a)
259 return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
269 /// \param __a
274 _mm_rcp_ps(__m128 __a)
276 return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
286 /// \param __a
293 _mm_rsqrt_ss(__m128 __a)
295 return __builtin_ia32_rsqrtss((__v4sf)__a);
305 /// \param __a
310 _mm_rsqrt_ps(__m128 __a)
312 return __builtin_ia32_rsqrtps((__v4sf)__a);
325 /// \param __a
335 _mm_min_ss(__m128 __a, __m128 __b)
337 return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
349 /// \param __a
356 _mm_min_ps(__m128 __a, __m128 __b)
358 return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);
371 /// \param __a
381 _mm_max_ss(__m128 __a, __m128 __b)
383 return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
395 /// \param __a
402 _mm_max_ps(__m128 __a, __m128 __b)
404 return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
413 /// \param __a
420 _mm_and_ps(__m128 __a, __m128 __b)
422 return (__m128)((__v4su)__a & (__v4su)__b);
433 /// \param __a
442 _mm_andnot_ps(__m128 __a, __m128 __b)
444 return (__m128)(~(__v4su)__a & (__v4su)__b);
453 /// \param __a
460 _mm_or_ps(__m128 __a, __m128 __b)
462 return (__m128)((__v4su)__a | (__v4su)__b);
472 /// \param __a
479 _mm_xor_ps(__m128 __a, __m128 __b)
481 return (__m128)((__v4su)__a ^ (__v4su)__b);
495 /// \param __a
504 _mm_cmpeq_ss(__m128 __a, __m128 __b)
506 return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
519 /// \param __a
525 _mm_cmpeq_ps(__m128 __a, __m128 __b)
527 return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
542 /// \param __a
551 _mm_cmplt_ss(__m128 __a, __m128 __b)
553 return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
567 /// \param __a
573 _mm_cmplt_ps(__m128 __a, __m128 __b)
575 return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
590 /// \param __a
599 _mm_cmple_ss(__m128 __a, __m128 __b)
601 return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
615 /// \param __a
621 _mm_cmple_ps(__m128 __a, __m128 __b)
623 return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
638 /// \param __a
647 _mm_cmpgt_ss(__m128 __a, __m128 __b)
649 return (__m128)__builtin_shufflevector((__v4sf)__a,
650 (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),
665 /// \param __a
671 _mm_cmpgt_ps(__m128 __a, __m128 __b)
673 return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
688 /// \param __a
697 _mm_cmpge_ss(__m128 __a, __m128 __b)
699 return (__m128)__builtin_shufflevector((__v4sf)__a,
700 (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),
715 /// \param __a
721 _mm_cmpge_ps(__m128 __a, __m128 __b)
723 return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
738 /// \param __a
747 _mm_cmpneq_ss(__m128 __a, __m128 __b)
749 return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
763 /// \param __a
769 _mm_cmpneq_ps(__m128 __a, __m128 __b)
771 return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
787 /// \param __a
796 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
798 return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
813 /// \param __a
819 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
821 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
837 /// \param __a
846 _mm_cmpnle_ss(__m128 __a, __m128 __b)
848 return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
863 /// \param __a
869 _mm_cmpnle_ps(__m128 __a, __m128 __b)
871 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
887 /// \param __a
896 _mm_cmpngt_ss(__m128 __a, __m128 __b)
898 return (__m128)__builtin_shufflevector((__v4sf)__a,
899 (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),
915 /// \param __a
921 _mm_cmpngt_ps(__m128 __a, __m128 __b)
923 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
939 /// \param __a
948 _mm_cmpnge_ss(__m128 __a, __m128 __b)
950 return (__m128)__builtin_shufflevector((__v4sf)__a,
951 (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),
967 /// \param __a
973 _mm_cmpnge_ps(__m128 __a, __m128 __b)
975 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
991 /// \param __a
1000 _mm_cmpord_ss(__m128 __a, __m128 __b)
1002 return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
1018 /// \param __a
1024 _mm_cmpord_ps(__m128 __a, __m128 __b)
1026 return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
1042 /// \param __a
1051 _mm_cmpunord_ss(__m128 __a, __m128 __b)
1053 return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
1069 /// \param __a
1075 _mm_cmpunord_ps(__m128 __a, __m128 __b)
1077 return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
1091 /// \param __a
1099 _mm_comieq_ss(__m128 __a, __m128 __b)
1101 return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
1116 /// \param __a
1124 _mm_comilt_ss(__m128 __a, __m128 __b)
1126 return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
1140 /// \param __a
1148 _mm_comile_ss(__m128 __a, __m128 __b)
1150 return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
1164 /// \param __a
1172 _mm_comigt_ss(__m128 __a, __m128 __b)
1174 return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
1188 /// \param __a
1196 _mm_comige_ss(__m128 __a, __m128 __b)
1198 return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
1212 /// \param __a
1220 _mm_comineq_ss(__m128 __a, __m128 __b)
1222 return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
1235 /// \param __a
1243 _mm_ucomieq_ss(__m128 __a, __m128 __b)
1245 return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
1259 /// \param __a
1267 _mm_ucomilt_ss(__m128 __a, __m128 __b)
1269 return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
1283 /// \param __a
1291 _mm_ucomile_ss(__m128 __a, __m128 __b)
1293 return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
1307 /// \param __a
1315 _mm_ucomigt_ss(__m128 __a, __m128 __b)
1317 return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
1331 /// \param __a
1339 _mm_ucomige_ss(__m128 __a, __m128 __b)
1341 return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
1354 /// \param __a
1362 _mm_ucomineq_ss(__m128 __a, __m128 __b)
1364 return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
1379 /// \param __a
1384 _mm_cvtss_si32(__m128 __a)
1386 return __builtin_ia32_cvtss2si((__v4sf)__a);
1401 /// \param __a
1406 _mm_cvt_ss2si(__m128 __a)
1408 return _mm_cvtss_si32(__a);
1425 /// \param __a
1430 _mm_cvtss_si64(__m128 __a)
1432 return __builtin_ia32_cvtss2si64((__v4sf)__a);
1448 /// \param __a
1452 _mm_cvtps_pi32(__m128 __a)
1454 return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);
1468 /// \param __a
1472 _mm_cvt_ps2pi(__m128 __a)
1474 return _mm_cvtps_pi32(__a);
1489 /// \param __a
1494 _mm_cvttss_si32(__m128 __a)
1496 return __builtin_ia32_cvttss2si((__v4sf)__a);
1511 /// \param __a
1516 _mm_cvtt_ss2si(__m128 __a)
1518 return _mm_cvttss_si32(__a);
1534 /// \param __a
1539 _mm_cvttss_si64(__m128 __a)
1541 return __builtin_ia32_cvttss2si64((__v4sf)__a);
1558 /// \param __a
1562 _mm_cvttps_pi32(__m128 __a)
1564 return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
1579 /// \param __a
1583 _mm_cvtt_ps2pi(__m128 __a)
1585 return _mm_cvttps_pi32(__a);
1597 /// \param __a
1605 _mm_cvtsi32_ss(__m128 __a, int __b)
1607 __a[0] = __b;
1608 return __a;
1620 /// \param __a
1628 _mm_cvt_si2ss(__m128 __a, int __b)
1630 return _mm_cvtsi32_ss(__a, __b);
1644 /// \param __a
1652 _mm_cvtsi64_ss(__m128 __a, long long __b)
1654 __a[0] = __b;
1655 return __a;
1669 /// \param __a
1678 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
1680 return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);
1692 /// \param __a
1701 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
1703 return _mm_cvtpi32_ps(__a, __b);
1713 /// \param __a
1718 _mm_cvtss_f32(__m128 __a)
1720 return __a[0];
1731 /// \param __a
1739 _mm_loadh_pi(__m128 __a, const __m64 *__p)
1747 return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
1758 /// \param __a
1766 _mm_loadl_pi(__m128 __a, const __m64 *__p)
1774 return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
1879 __m128 __a = _mm_load_ps(__p);
1880 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
2032 /// \param __a
2035 _mm_storeh_pi(__m64 *__p, __m128 __a)
2041 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3);
2053 /// \param __a
2056 _mm_storel_pi(__m64 *__p, __m128 __a)
2062 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1);
2074 /// \param __a
2077 _mm_store_ss(float *__p, __m128 __a)
2082 ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
2095 /// \param __a
2098 _mm_storeu_ps(float *__p, __m128 __a)
2103 ((struct __storeu_ps*)__p)->__v = __a;
2116 /// \param __a
2119 _mm_store_ps(float *__p, __m128 __a)
2121 *(__m128*)__p = __a;
2134 /// \param __a
2138 _mm_store1_ps(float *__p, __m128 __a)
2140 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
2141 _mm_store_ps(__p, __a);
2154 /// \param __a
2158 _mm_store_ps1(float *__p, __m128 __a)
2160 _mm_store1_ps(__p, __a);
2174 /// \param __a
2177 _mm_storer_ps(float *__p, __m128 __a)
2179 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
2180 _mm_store_ps(__p, __a);
2232 /// \param __a
2235 _mm_stream_pi(void *__p, __m64 __a)
2237 __builtin_ia32_movntq((__m64 *)__p, __a);
2251 /// \param __a
2254 _mm_stream_ps(void *__p, __m128 __a)
2256 __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
2340 /// \param __a
2346 _mm_max_pi16(__m64 __a, __m64 __b)
2348 return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
2359 /// \param __a
2365 _mm_max_pu8(__m64 __a, __m64 __b)
2367 return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
2378 /// \param __a
2384 _mm_min_pi16(__m64 __a, __m64 __b)
2386 return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
2397 /// \param __a
2403 _mm_min_pu8(__m64 __a, __m64 __b)
2405 return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
2416 /// \param __a
2418 /// \returns The most significant bit from each 8-bit element in \a __a,
2421 _mm_movemask_pi8(__m64 __a)
2423 return __builtin_ia32_pmovmskb((__v8qi)__a);
2434 /// \param __a
2440 _mm_mulhi_pu16(__m64 __a, __m64 __b)
2442 return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
2519 /// \param __a
2525 _mm_avg_pu8(__m64 __a, __m64 __b)
2527 return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
2538 /// \param __a
2544 _mm_avg_pu16(__m64 __a, __m64 __b)
2546 return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
2558 /// \param __a
2566 _mm_sad_pu8(__m64 __a, __m64 __b)
2568 return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
2735 /// \param __a
2745 _mm_unpackhi_ps(__m128 __a, __m128 __b)
2747 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
2757 /// \param __a
2767 _mm_unpacklo_ps(__m128 __a, __m128 __b)
2769 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
2781 /// \param __a
2789 _mm_move_ss(__m128 __a, __m128 __b)
2791 __a[0] = __b[0];
2792 return __a;
2803 /// \param __a
2811 _mm_movehl_ps(__m128 __a, __m128 __b)
2813 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
2824 /// \param __a
2832 _mm_movelh_ps(__m128 __a, __m128 __b)
2834 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
2844 /// \param __a
2850 _mm_cvtpi16_ps(__m64 __a)
2856 __b = _mm_cmpgt_pi16(__b, __a);
2857 __c = _mm_unpackhi_pi16(__a, __b);
2861 __c = _mm_unpacklo_pi16(__a, __b);
2874 /// \param __a
2880 _mm_cvtpu16_ps(__m64 __a)
2886 __c = _mm_unpackhi_pi16(__a, __b);
2890 __c = _mm_unpacklo_pi16(__a, __b);
2903 /// \param __a
2909 _mm_cvtpi8_ps(__m64 __a)
2914 __b = _mm_cmpgt_pi8(__b, __a);
2915 __b = _mm_unpacklo_pi8(__a, __b);
2927 /// \param __a
2934 _mm_cvtpu8_ps(__m64 __a)
2939 __b = _mm_unpacklo_pi8(__a, __b);
2951 /// \param __a
2961 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
2969 return _mm_cvtpi32_ps(__c, __a);
2985 /// \param __a
2990 _mm_cvtps_pi16(__m128 __a)
2994 __b = _mm_cvtps_pi32(__a);
2995 __a = _mm_movehl_ps(__a, __a);
2996 __c = _mm_cvtps_pi32(__a);
3015 /// \param __a
3020 _mm_cvtps_pi8(__m128 __a)
3024 __b = _mm_cvtps_pi16(__a);
3039 /// \param __a
3045 _mm_movemask_ps(__m128 __a)
3047 return __builtin_ia32_movmskps((__v4sf)__a);