Lines Matching defs:__A

22 ///    For each element, computes <c> (__A * __B) + __C </c>.
28 /// \param __A
36 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
38 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
42 /// For each element, computes <c> (__A * __B) + __C </c>.
48 /// \param __A
56 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
58 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
65 /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
66 /// result[127:32] = __A[127:32]
73 /// \param __A
83 /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits.
85 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
87 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
94 /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
95 /// result[127:64] = __A[127:64]
102 /// \param __A
112 /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits.
114 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
116 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
120 /// For each element, computes <c> (__A * __B) - __C </c>.
126 /// \param __A
134 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
136 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
140 /// For each element, computes <c> (__A * __B) - __C </c>.
146 /// \param __A
154 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
156 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
163 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
164 /// result[127:32] = __A[127:32]
171 /// \param __A
181 /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
183 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
185 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
192 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
193 /// result[127:64] = __A[127:64]
200 /// \param __A
210 /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
212 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
214 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
218 /// For each element, computes <c> -(__A * __B) + __C </c>.
224 /// \param __A
232 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
234 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
238 /// For each element, computes <c> -(__A * __B) + __C </c>.
244 /// \param __A
252 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
254 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
261 /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
262 /// result[127:32] = __A[127:32]
269 /// \param __A
279 /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
281 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
283 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
290 /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
291 /// result[127:64] = __A[127:64]
298 /// \param __A
308 /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
310 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
312 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
316 /// For each element, computes <c> -(__A * __B) - __C </c>.
322 /// \param __A
330 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
332 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
336 /// For each element, computes <c> -(__A * __B) - __C </c>.
342 /// \param __A
350 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
352 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
359 /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
360 /// result[127:32] = __A[127:32]
367 /// \param __A
377 /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
379 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
381 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
388 /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
389 /// result[127:64] = __A[127:64]
396 /// \param __A
406 /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
408 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
410 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
417 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
418 /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
419 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
420 /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
427 /// \param __A
435 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
437 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
444 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
445 /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
452 /// \param __A
460 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
462 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
469 /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
470 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
471 /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
472 /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96]
479 /// \param __A
487 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
489 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
496 /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
497 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
504 /// \param __A
512 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
514 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
518 /// For each element, computes <c> (__A * __B) + __C </c>.
524 /// \param __A
532 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
534 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
538 /// For each element, computes <c> (__A * __B) + __C </c>.
544 /// \param __A
552 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
554 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
558 /// For each element, computes <c> (__A * __B) - __C </c>.
564 /// \param __A
572 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
574 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
578 /// For each element, computes <c> (__A * __B) - __C </c>.
584 /// \param __A
592 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
594 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
598 /// For each element, computes <c> -(__A * __B) + __C </c>.
604 /// \param __A
612 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
614 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
618 /// For each element, computes <c> -(__A * __B) + __C </c>.
624 /// \param __A
632 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
634 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
638 /// For each element, computes <c> -(__A * __B) - __C </c>.
644 /// \param __A
652 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
654 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
658 /// For each element, computes <c> -(__A * __B) - __C </c>.
664 /// \param __A
672 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
674 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
681 /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
682 /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
683 /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
684 /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
685 /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128]
686 /// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160]
687 /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192]
688 /// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224]
695 /// \param __A
703 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
705 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
712 /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
713 /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
714 /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
715 /// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192]
722 /// \param __A
730 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
732 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
739 /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
740 /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
741 /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
742 /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96]
743 /// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128]
744 /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160]
745 /// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192]
746 /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224]
753 /// \param __A
761 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
763 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
770 /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
771 /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
772 /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
773 /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192]
780 /// \param __A
788 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
790 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);