10b57cec5SDimitry Andric /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __FMAINTRIN_H 150b57cec5SDimitry Andric #define __FMAINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 180b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) 190b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) 200b57cec5SDimitry Andric 2106c3fb27SDimitry Andric /// Computes a multiply-add of 128-bit vectors of [4 x float]. 2206c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) + __C </c>. 2306c3fb27SDimitry Andric /// 2406c3fb27SDimitry Andric /// \headerfile <immintrin.h> 2506c3fb27SDimitry Andric /// 2606c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213PS instruction. 2706c3fb27SDimitry Andric /// 2806c3fb27SDimitry Andric /// \param __A 2906c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 3006c3fb27SDimitry Andric /// \param __B 3106c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 3206c3fb27SDimitry Andric /// \param __C 3306c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend. 3406c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result. 350b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 360b57cec5SDimitry Andric _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 370b57cec5SDimitry Andric { 380b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric 4106c3fb27SDimitry Andric /// Computes a multiply-add of 128-bit vectors of [2 x double]. 4206c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) + __C </c>. 4306c3fb27SDimitry Andric /// 4406c3fb27SDimitry Andric /// \headerfile <immintrin.h> 4506c3fb27SDimitry Andric /// 4606c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213PD instruction. 4706c3fb27SDimitry Andric /// 4806c3fb27SDimitry Andric /// \param __A 4906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 5006c3fb27SDimitry Andric /// \param __B 5106c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 5206c3fb27SDimitry Andric /// \param __C 5306c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend. 5406c3fb27SDimitry Andric /// \returns A 128-bit [2 x double] vector containing the result. 550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 560b57cec5SDimitry Andric _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 570b57cec5SDimitry Andric { 580b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric 6106c3fb27SDimitry Andric /// Computes a scalar multiply-add of the single-precision values in the 6206c3fb27SDimitry Andric /// low 32 bits of 128-bit vectors of [4 x float]. 63*0fca6ea1SDimitry Andric /// 64*0fca6ea1SDimitry Andric /// \code{.operation} 6506c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] 6606c3fb27SDimitry Andric /// result[127:32] = __A[127:32] 6706c3fb27SDimitry Andric /// \endcode 6806c3fb27SDimitry Andric /// 6906c3fb27SDimitry Andric /// \headerfile <immintrin.h> 7006c3fb27SDimitry Andric /// 7106c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213SS instruction. 7206c3fb27SDimitry Andric /// 7306c3fb27SDimitry Andric /// \param __A 7406c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand in the low 7506c3fb27SDimitry Andric /// 32 bits. 7606c3fb27SDimitry Andric /// \param __B 7706c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier in the low 7806c3fb27SDimitry Andric /// 32 bits. 7906c3fb27SDimitry Andric /// \param __C 8006c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend in the low 8106c3fb27SDimitry Andric /// 32 bits. 8206c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result in the low 8306c3fb27SDimitry Andric /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. 840b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 850b57cec5SDimitry Andric _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 860b57cec5SDimitry Andric { 870b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 880b57cec5SDimitry Andric } 890b57cec5SDimitry Andric 9006c3fb27SDimitry Andric /// Computes a scalar multiply-add of the double-precision values in the 9106c3fb27SDimitry Andric /// low 64 bits of 128-bit vectors of [2 x double]. 92*0fca6ea1SDimitry Andric /// 93*0fca6ea1SDimitry Andric /// \code{.operation} 9406c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] 9506c3fb27SDimitry Andric /// result[127:64] = __A[127:64] 9606c3fb27SDimitry Andric /// \endcode 9706c3fb27SDimitry Andric /// 9806c3fb27SDimitry Andric /// \headerfile <immintrin.h> 9906c3fb27SDimitry Andric /// 10006c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213SD instruction. 10106c3fb27SDimitry Andric /// 10206c3fb27SDimitry Andric /// \param __A 10306c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand in the low 10406c3fb27SDimitry Andric /// 64 bits. 10506c3fb27SDimitry Andric /// \param __B 10606c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier in the low 10706c3fb27SDimitry Andric /// 64 bits. 10806c3fb27SDimitry Andric /// \param __C 10906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend in the low 11006c3fb27SDimitry Andric /// 64 bits. 11106c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result in the low 11206c3fb27SDimitry Andric /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. 1130b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1140b57cec5SDimitry Andric _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 1150b57cec5SDimitry Andric { 1160b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric 11906c3fb27SDimitry Andric /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. 12006c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) - __C </c>. 12106c3fb27SDimitry Andric /// 12206c3fb27SDimitry Andric /// \headerfile <immintrin.h> 12306c3fb27SDimitry Andric /// 12406c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213PS instruction. 12506c3fb27SDimitry Andric /// 12606c3fb27SDimitry Andric /// \param __A 12706c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 12806c3fb27SDimitry Andric /// \param __B 12906c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 13006c3fb27SDimitry Andric /// \param __C 13106c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the subtrahend. 13206c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result. 1330b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1340b57cec5SDimitry Andric _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 1350b57cec5SDimitry Andric { 1360b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 13906c3fb27SDimitry Andric /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. 14006c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) - __C </c>. 14106c3fb27SDimitry Andric /// 14206c3fb27SDimitry Andric /// \headerfile <immintrin.h> 14306c3fb27SDimitry Andric /// 14406c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213PD instruction. 14506c3fb27SDimitry Andric /// 14606c3fb27SDimitry Andric /// \param __A 14706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 14806c3fb27SDimitry Andric /// \param __B 14906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 15006c3fb27SDimitry Andric /// \param __C 15106c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend. 15206c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result. 1530b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1540b57cec5SDimitry Andric _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 1550b57cec5SDimitry Andric { 1560b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 15906c3fb27SDimitry Andric /// Computes a scalar multiply-subtract of the single-precision values in 16006c3fb27SDimitry Andric /// the low 32 bits of 128-bit vectors of [4 x float]. 161*0fca6ea1SDimitry Andric /// 162*0fca6ea1SDimitry Andric /// \code{.operation} 16306c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] 16406c3fb27SDimitry Andric /// result[127:32] = __A[127:32] 16506c3fb27SDimitry Andric /// \endcode 16606c3fb27SDimitry Andric /// 16706c3fb27SDimitry Andric /// \headerfile <immintrin.h> 16806c3fb27SDimitry Andric /// 16906c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213SS instruction. 17006c3fb27SDimitry Andric /// 17106c3fb27SDimitry Andric /// \param __A 17206c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand in the low 17306c3fb27SDimitry Andric /// 32 bits. 17406c3fb27SDimitry Andric /// \param __B 17506c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier in the low 17606c3fb27SDimitry Andric /// 32 bits. 17706c3fb27SDimitry Andric /// \param __C 17806c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the subtrahend in the low 17906c3fb27SDimitry Andric /// 32 bits. 18006c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result in the low 18106c3fb27SDimitry Andric /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. 1820b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1830b57cec5SDimitry Andric _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 1840b57cec5SDimitry Andric { 1850b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 18806c3fb27SDimitry Andric /// Computes a scalar multiply-subtract of the double-precision values in 18906c3fb27SDimitry Andric /// the low 64 bits of 128-bit vectors of [2 x double]. 190*0fca6ea1SDimitry Andric /// 191*0fca6ea1SDimitry Andric /// \code{.operation} 19206c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] 19306c3fb27SDimitry Andric /// result[127:64] = __A[127:64] 19406c3fb27SDimitry Andric /// \endcode 19506c3fb27SDimitry Andric /// 19606c3fb27SDimitry Andric /// \headerfile <immintrin.h> 19706c3fb27SDimitry Andric /// 19806c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213SD instruction. 19906c3fb27SDimitry Andric /// 20006c3fb27SDimitry Andric /// \param __A 20106c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand in the low 20206c3fb27SDimitry Andric /// 64 bits. 20306c3fb27SDimitry Andric /// \param __B 20406c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier in the low 20506c3fb27SDimitry Andric /// 64 bits. 20606c3fb27SDimitry Andric /// \param __C 20706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend in the low 20806c3fb27SDimitry Andric /// 64 bits. 20906c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result in the low 21006c3fb27SDimitry Andric /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. 2110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 2120b57cec5SDimitry Andric _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 2130b57cec5SDimitry Andric { 2140b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 21706c3fb27SDimitry Andric /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. 21806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) + __C </c>. 21906c3fb27SDimitry Andric /// 22006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 22106c3fb27SDimitry Andric /// 22206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213DPS instruction. 22306c3fb27SDimitry Andric /// 22406c3fb27SDimitry Andric /// \param __A 22506c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 22606c3fb27SDimitry Andric /// \param __B 22706c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 22806c3fb27SDimitry Andric /// \param __C 22906c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend. 23006c3fb27SDimitry Andric /// \returns A 128-bit [4 x float] vector containing the result. 2310b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2320b57cec5SDimitry Andric _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 2330b57cec5SDimitry Andric { 2340b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 23706c3fb27SDimitry Andric /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. 23806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) + __C </c>. 23906c3fb27SDimitry Andric /// 24006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 24106c3fb27SDimitry Andric /// 24206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213PD instruction. 24306c3fb27SDimitry Andric /// 24406c3fb27SDimitry Andric /// \param __A 24506c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 24606c3fb27SDimitry Andric /// \param __B 24706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 24806c3fb27SDimitry Andric /// \param __C 24906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend. 25006c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result. 2510b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 2520b57cec5SDimitry Andric _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 2530b57cec5SDimitry Andric { 2540b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 25706c3fb27SDimitry Andric /// Computes a scalar negated multiply-add of the single-precision values in 25806c3fb27SDimitry Andric /// the low 32 bits of 128-bit vectors of [4 x float]. 259*0fca6ea1SDimitry Andric /// 260*0fca6ea1SDimitry Andric /// \code{.operation} 26106c3fb27SDimitry Andric /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0] 26206c3fb27SDimitry Andric /// result[127:32] = __A[127:32] 26306c3fb27SDimitry Andric /// \endcode 26406c3fb27SDimitry Andric /// 26506c3fb27SDimitry Andric /// \headerfile <immintrin.h> 26606c3fb27SDimitry Andric /// 26706c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213SS instruction. 26806c3fb27SDimitry Andric /// 26906c3fb27SDimitry Andric /// \param __A 27006c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand in the low 27106c3fb27SDimitry Andric /// 32 bits. 27206c3fb27SDimitry Andric /// \param __B 27306c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier in the low 27406c3fb27SDimitry Andric /// 32 bits. 27506c3fb27SDimitry Andric /// \param __C 27606c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend in the low 27706c3fb27SDimitry Andric /// 32 bits. 27806c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result in the low 27906c3fb27SDimitry Andric /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. 2800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2810b57cec5SDimitry Andric _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 2820b57cec5SDimitry Andric { 2830b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 28606c3fb27SDimitry Andric /// Computes a scalar negated multiply-add of the double-precision values 28706c3fb27SDimitry Andric /// in the low 64 bits of 128-bit vectors of [2 x double]. 288*0fca6ea1SDimitry Andric /// 289*0fca6ea1SDimitry Andric /// \code{.operation} 29006c3fb27SDimitry Andric /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0] 29106c3fb27SDimitry Andric /// result[127:64] = __A[127:64] 29206c3fb27SDimitry Andric /// \endcode 29306c3fb27SDimitry Andric /// 29406c3fb27SDimitry Andric /// \headerfile <immintrin.h> 29506c3fb27SDimitry Andric /// 29606c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213SD instruction. 29706c3fb27SDimitry Andric /// 29806c3fb27SDimitry Andric /// \param __A 29906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand in the low 30006c3fb27SDimitry Andric /// 64 bits. 30106c3fb27SDimitry Andric /// \param __B 30206c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier in the low 30306c3fb27SDimitry Andric /// 64 bits. 30406c3fb27SDimitry Andric /// \param __C 30506c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend in the low 30606c3fb27SDimitry Andric /// 64 bits. 30706c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result in the low 30806c3fb27SDimitry Andric /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. 3090b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 3100b57cec5SDimitry Andric _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 3110b57cec5SDimitry Andric { 3120b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 31506c3fb27SDimitry Andric /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. 31606c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) - __C </c>. 31706c3fb27SDimitry Andric /// 31806c3fb27SDimitry Andric /// \headerfile <immintrin.h> 31906c3fb27SDimitry Andric /// 32006c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. 32106c3fb27SDimitry Andric /// 32206c3fb27SDimitry Andric /// \param __A 32306c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 32406c3fb27SDimitry Andric /// \param __B 32506c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 32606c3fb27SDimitry Andric /// \param __C 32706c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the subtrahend. 32806c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result. 3290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 3300b57cec5SDimitry Andric _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 3310b57cec5SDimitry Andric { 3320b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric 33506c3fb27SDimitry Andric /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. 33606c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) - __C </c>. 33706c3fb27SDimitry Andric /// 33806c3fb27SDimitry Andric /// \headerfile <immintrin.h> 33906c3fb27SDimitry Andric /// 34006c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. 34106c3fb27SDimitry Andric /// 34206c3fb27SDimitry Andric /// \param __A 34306c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 34406c3fb27SDimitry Andric /// \param __B 34506c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 34606c3fb27SDimitry Andric /// \param __C 34706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend. 34806c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result. 3490b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 3500b57cec5SDimitry Andric _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 3510b57cec5SDimitry Andric { 3520b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 35506c3fb27SDimitry Andric /// Computes a scalar negated multiply-subtract of the single-precision 35606c3fb27SDimitry Andric /// values in the low 32 bits of 128-bit vectors of [4 x float]. 357*0fca6ea1SDimitry Andric /// 358*0fca6ea1SDimitry Andric /// \code{.operation} 35906c3fb27SDimitry Andric /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0] 36006c3fb27SDimitry Andric /// result[127:32] = __A[127:32] 36106c3fb27SDimitry Andric /// \endcode 36206c3fb27SDimitry Andric /// 36306c3fb27SDimitry Andric /// \headerfile <immintrin.h> 36406c3fb27SDimitry Andric /// 36506c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213SS instruction. 36606c3fb27SDimitry Andric /// 36706c3fb27SDimitry Andric /// \param __A 36806c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand in the low 36906c3fb27SDimitry Andric /// 32 bits. 37006c3fb27SDimitry Andric /// \param __B 37106c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier in the low 37206c3fb27SDimitry Andric /// 32 bits. 37306c3fb27SDimitry Andric /// \param __C 37406c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the subtrahend in the low 37506c3fb27SDimitry Andric /// 32 bits. 37606c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result in the low 37706c3fb27SDimitry Andric /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. 3780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 3790b57cec5SDimitry Andric _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 3800b57cec5SDimitry Andric { 3810b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 38406c3fb27SDimitry Andric /// Computes a scalar negated multiply-subtract of the double-precision 38506c3fb27SDimitry Andric /// values in the low 64 bits of 128-bit vectors of [2 x double]. 386*0fca6ea1SDimitry Andric /// 387*0fca6ea1SDimitry Andric /// \code{.operation} 38806c3fb27SDimitry Andric /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0] 38906c3fb27SDimitry Andric /// result[127:64] = __A[127:64] 39006c3fb27SDimitry Andric /// \endcode 39106c3fb27SDimitry Andric /// 39206c3fb27SDimitry Andric /// \headerfile <immintrin.h> 39306c3fb27SDimitry Andric /// 39406c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213SD instruction. 39506c3fb27SDimitry Andric /// 39606c3fb27SDimitry Andric /// \param __A 39706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand in the low 39806c3fb27SDimitry Andric /// 64 bits. 39906c3fb27SDimitry Andric /// \param __B 40006c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier in the low 40106c3fb27SDimitry Andric /// 64 bits. 40206c3fb27SDimitry Andric /// \param __C 40306c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the subtrahend in the low 40406c3fb27SDimitry Andric /// 64 bits. 40506c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result in the low 40606c3fb27SDimitry Andric /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. 4070b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 4080b57cec5SDimitry Andric _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 4090b57cec5SDimitry Andric { 4100b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric 41306c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 128-bit vectors of 41406c3fb27SDimitry Andric /// [4 x float]. 415*0fca6ea1SDimitry Andric /// 416*0fca6ea1SDimitry Andric /// \code{.operation} 41706c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] 41806c3fb27SDimitry Andric /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] 41906c3fb27SDimitry Andric /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] 42006c3fb27SDimitry Andric /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] 42106c3fb27SDimitry Andric /// \endcode 42206c3fb27SDimitry Andric /// 42306c3fb27SDimitry Andric /// \headerfile <immintrin.h> 42406c3fb27SDimitry Andric /// 42506c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. 42606c3fb27SDimitry Andric /// 42706c3fb27SDimitry Andric /// \param __A 42806c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 42906c3fb27SDimitry Andric /// \param __B 43006c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 43106c3fb27SDimitry Andric /// \param __C 43206c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend/subtrahend. 43306c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result. 4340b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 4350b57cec5SDimitry Andric _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 4360b57cec5SDimitry Andric { 4370b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric 44006c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 128-bit vectors of 44106c3fb27SDimitry Andric /// [2 x double]. 442*0fca6ea1SDimitry Andric /// 443*0fca6ea1SDimitry Andric /// \code{.operation} 44406c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] 44506c3fb27SDimitry Andric /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] 44606c3fb27SDimitry Andric /// \endcode 44706c3fb27SDimitry Andric /// 44806c3fb27SDimitry Andric /// \headerfile <immintrin.h> 44906c3fb27SDimitry Andric /// 45006c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. 45106c3fb27SDimitry Andric /// 45206c3fb27SDimitry Andric /// \param __A 45306c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 45406c3fb27SDimitry Andric /// \param __B 45506c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 45606c3fb27SDimitry Andric /// \param __C 45706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend/subtrahend. 45806c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result. 4590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 4600b57cec5SDimitry Andric _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 4610b57cec5SDimitry Andric { 4620b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 46506c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 128-bit vectors of 46606c3fb27SDimitry Andric /// [4 x float]. 467*0fca6ea1SDimitry Andric /// 468*0fca6ea1SDimitry Andric /// \code{.operation} 46906c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] 47006c3fb27SDimitry Andric /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] 47106c3fb27SDimitry Andric /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] 47206c3fb27SDimitry Andric /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96] 47306c3fb27SDimitry Andric /// \endcode 47406c3fb27SDimitry Andric /// 47506c3fb27SDimitry Andric /// \headerfile <immintrin.h> 47606c3fb27SDimitry Andric /// 47706c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. 47806c3fb27SDimitry Andric /// 47906c3fb27SDimitry Andric /// \param __A 48006c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplicand. 48106c3fb27SDimitry Andric /// \param __B 48206c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the multiplier. 48306c3fb27SDimitry Andric /// \param __C 48406c3fb27SDimitry Andric /// A 128-bit vector of [4 x float] containing the addend/subtrahend. 48506c3fb27SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the result. 4860b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 4870b57cec5SDimitry Andric _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 4880b57cec5SDimitry Andric { 4890b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 49206c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 128-bit vectors of 49306c3fb27SDimitry Andric /// [2 x double]. 494*0fca6ea1SDimitry Andric /// 495*0fca6ea1SDimitry Andric /// \code{.operation} 49606c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] 49706c3fb27SDimitry Andric /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] 49806c3fb27SDimitry Andric /// \endcode 49906c3fb27SDimitry Andric /// 50006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 50106c3fb27SDimitry Andric /// 50206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. 50306c3fb27SDimitry Andric /// 50406c3fb27SDimitry Andric /// \param __A 50506c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplicand. 50606c3fb27SDimitry Andric /// \param __B 50706c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the multiplier. 50806c3fb27SDimitry Andric /// \param __C 50906c3fb27SDimitry Andric /// A 128-bit vector of [2 x double] containing the addend/subtrahend. 51006c3fb27SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the result. 5110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 5120b57cec5SDimitry Andric _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 5130b57cec5SDimitry Andric { 5140b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 51706c3fb27SDimitry Andric /// Computes a multiply-add of 256-bit vectors of [8 x float]. 51806c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) + __C </c>. 51906c3fb27SDimitry Andric /// 52006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 52106c3fb27SDimitry Andric /// 52206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213PS instruction. 52306c3fb27SDimitry Andric /// 52406c3fb27SDimitry Andric /// \param __A 52506c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 52606c3fb27SDimitry Andric /// \param __B 52706c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 52806c3fb27SDimitry Andric /// \param __C 52906c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the addend. 53006c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 5310b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 5320b57cec5SDimitry Andric _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 5330b57cec5SDimitry Andric { 5340b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 53706c3fb27SDimitry Andric /// Computes a multiply-add of 256-bit vectors of [4 x double]. 53806c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) + __C </c>. 53906c3fb27SDimitry Andric /// 54006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 54106c3fb27SDimitry Andric /// 54206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADD213PD instruction. 54306c3fb27SDimitry Andric /// 54406c3fb27SDimitry Andric /// \param __A 54506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 54606c3fb27SDimitry Andric /// \param __B 54706c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 54806c3fb27SDimitry Andric /// \param __C 54906c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the addend. 55006c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 5510b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 5520b57cec5SDimitry Andric _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 5530b57cec5SDimitry Andric { 5540b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric 55706c3fb27SDimitry Andric /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. 55806c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) - __C </c>. 55906c3fb27SDimitry Andric /// 56006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 56106c3fb27SDimitry Andric /// 56206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213PS instruction. 56306c3fb27SDimitry Andric /// 56406c3fb27SDimitry Andric /// \param __A 56506c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 56606c3fb27SDimitry Andric /// \param __B 56706c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 56806c3fb27SDimitry Andric /// \param __C 56906c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the subtrahend. 57006c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 5710b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 5720b57cec5SDimitry Andric _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 5730b57cec5SDimitry Andric { 5740b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 57706c3fb27SDimitry Andric /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. 57806c3fb27SDimitry Andric /// For each element, computes <c> (__A * __B) - __C </c>. 57906c3fb27SDimitry Andric /// 58006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 58106c3fb27SDimitry Andric /// 58206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUB213PD instruction. 58306c3fb27SDimitry Andric /// 58406c3fb27SDimitry Andric /// \param __A 58506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 58606c3fb27SDimitry Andric /// \param __B 58706c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 58806c3fb27SDimitry Andric /// \param __C 58906c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the subtrahend. 59006c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 5910b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 5920b57cec5SDimitry Andric _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 5930b57cec5SDimitry Andric { 5940b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric 59706c3fb27SDimitry Andric /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. 59806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) + __C </c>. 59906c3fb27SDimitry Andric /// 60006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 60106c3fb27SDimitry Andric /// 60206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213PS instruction. 60306c3fb27SDimitry Andric /// 60406c3fb27SDimitry Andric /// \param __A 60506c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 60606c3fb27SDimitry Andric /// \param __B 60706c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 60806c3fb27SDimitry Andric /// \param __C 60906c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the addend. 61006c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 6110b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 6120b57cec5SDimitry Andric _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 6130b57cec5SDimitry Andric { 6140b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 61706c3fb27SDimitry Andric /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. 61806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) + __C </c>. 61906c3fb27SDimitry Andric /// 62006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 62106c3fb27SDimitry Andric /// 62206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMADD213PD instruction. 62306c3fb27SDimitry Andric /// 62406c3fb27SDimitry Andric /// \param __A 62506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 62606c3fb27SDimitry Andric /// \param __B 62706c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 62806c3fb27SDimitry Andric /// \param __C 62906c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the addend. 63006c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 6310b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 6320b57cec5SDimitry Andric _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 6330b57cec5SDimitry Andric { 6340b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 63706c3fb27SDimitry Andric /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. 63806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) - __C </c>. 63906c3fb27SDimitry Andric /// 64006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 64106c3fb27SDimitry Andric /// 64206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. 64306c3fb27SDimitry Andric /// 64406c3fb27SDimitry Andric /// \param __A 64506c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 64606c3fb27SDimitry Andric /// \param __B 64706c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 64806c3fb27SDimitry Andric /// \param __C 64906c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the subtrahend. 65006c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 6510b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 6520b57cec5SDimitry Andric _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 6530b57cec5SDimitry Andric { 6540b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 65706c3fb27SDimitry Andric /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. 65806c3fb27SDimitry Andric /// For each element, computes <c> -(__A * __B) - __C </c>. 65906c3fb27SDimitry Andric /// 66006c3fb27SDimitry Andric /// \headerfile <immintrin.h> 66106c3fb27SDimitry Andric /// 66206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. 66306c3fb27SDimitry Andric /// 66406c3fb27SDimitry Andric /// \param __A 66506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 66606c3fb27SDimitry Andric /// \param __B 66706c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 66806c3fb27SDimitry Andric /// \param __C 66906c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the subtrahend. 67006c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 6710b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 6720b57cec5SDimitry Andric _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 6730b57cec5SDimitry Andric { 6740b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); 6750b57cec5SDimitry Andric } 6760b57cec5SDimitry Andric 67706c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 256-bit vectors of 67806c3fb27SDimitry Andric /// [8 x float]. 679*0fca6ea1SDimitry Andric /// 680*0fca6ea1SDimitry Andric /// \code{.operation} 68106c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] 68206c3fb27SDimitry Andric /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] 68306c3fb27SDimitry Andric /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] 68406c3fb27SDimitry Andric /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] 68506c3fb27SDimitry Andric /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128] 68606c3fb27SDimitry Andric /// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160] 68706c3fb27SDimitry Andric /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192] 68806c3fb27SDimitry Andric /// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224] 68906c3fb27SDimitry Andric /// \endcode 69006c3fb27SDimitry Andric /// 69106c3fb27SDimitry Andric /// \headerfile <immintrin.h> 69206c3fb27SDimitry Andric /// 69306c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. 69406c3fb27SDimitry Andric /// 69506c3fb27SDimitry Andric /// \param __A 69606c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 69706c3fb27SDimitry Andric /// \param __B 69806c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 69906c3fb27SDimitry Andric /// \param __C 70006c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the addend/subtrahend. 70106c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 7020b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 7030b57cec5SDimitry Andric _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 7040b57cec5SDimitry Andric { 7050b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 70806c3fb27SDimitry Andric /// Computes a multiply with alternating add/subtract of 256-bit vectors of 70906c3fb27SDimitry Andric /// [4 x double]. 710*0fca6ea1SDimitry Andric /// 711*0fca6ea1SDimitry Andric /// \code{.operation} 71206c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] 71306c3fb27SDimitry Andric /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] 71406c3fb27SDimitry Andric /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128] 71506c3fb27SDimitry Andric /// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192] 71606c3fb27SDimitry Andric /// \endcode 71706c3fb27SDimitry Andric /// 71806c3fb27SDimitry Andric /// \headerfile <immintrin.h> 71906c3fb27SDimitry Andric /// 72006c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. 72106c3fb27SDimitry Andric /// 72206c3fb27SDimitry Andric /// \param __A 72306c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 72406c3fb27SDimitry Andric /// \param __B 72506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 72606c3fb27SDimitry Andric /// \param __C 72706c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the addend/subtrahend. 72806c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 7290b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 7300b57cec5SDimitry Andric _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 7310b57cec5SDimitry Andric { 7320b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 7330b57cec5SDimitry Andric } 7340b57cec5SDimitry Andric 73506c3fb27SDimitry Andric /// Computes a vector multiply with alternating add/subtract of 256-bit 73606c3fb27SDimitry Andric /// vectors of [8 x float]. 737*0fca6ea1SDimitry Andric /// 738*0fca6ea1SDimitry Andric /// \code{.operation} 73906c3fb27SDimitry Andric /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] 74006c3fb27SDimitry Andric /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] 74106c3fb27SDimitry Andric /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] 74206c3fb27SDimitry Andric /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96] 74306c3fb27SDimitry Andric /// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128] 74406c3fb27SDimitry Andric /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160] 74506c3fb27SDimitry Andric /// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192] 74606c3fb27SDimitry Andric /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224] 74706c3fb27SDimitry Andric /// \endcode 74806c3fb27SDimitry Andric /// 74906c3fb27SDimitry Andric /// \headerfile <immintrin.h> 75006c3fb27SDimitry Andric /// 75106c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. 75206c3fb27SDimitry Andric /// 75306c3fb27SDimitry Andric /// \param __A 75406c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplicand. 75506c3fb27SDimitry Andric /// \param __B 75606c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the multiplier. 75706c3fb27SDimitry Andric /// \param __C 75806c3fb27SDimitry Andric /// A 256-bit vector of [8 x float] containing the addend/subtrahend. 75906c3fb27SDimitry Andric /// \returns A 256-bit vector of [8 x float] containing the result. 7600b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 7610b57cec5SDimitry Andric _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 7620b57cec5SDimitry Andric { 7630b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 7640b57cec5SDimitry Andric } 7650b57cec5SDimitry Andric 76606c3fb27SDimitry Andric /// Computes a vector multiply with alternating add/subtract of 256-bit 76706c3fb27SDimitry Andric /// vectors of [4 x double]. 768*0fca6ea1SDimitry Andric /// 769*0fca6ea1SDimitry Andric /// \code{.operation} 77006c3fb27SDimitry Andric /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] 77106c3fb27SDimitry Andric /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] 77206c3fb27SDimitry Andric /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128] 77306c3fb27SDimitry Andric /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192] 77406c3fb27SDimitry Andric /// \endcode 77506c3fb27SDimitry Andric /// 77606c3fb27SDimitry Andric /// \headerfile <immintrin.h> 77706c3fb27SDimitry Andric /// 77806c3fb27SDimitry Andric /// This intrinsic corresponds to the \c VFMSUBADD213PD instruction. 77906c3fb27SDimitry Andric /// 78006c3fb27SDimitry Andric /// \param __A 78106c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplicand. 78206c3fb27SDimitry Andric /// \param __B 78306c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the multiplier. 78406c3fb27SDimitry Andric /// \param __C 78506c3fb27SDimitry Andric /// A 256-bit vector of [4 x double] containing the addend/subtrahend. 78606c3fb27SDimitry Andric /// \returns A 256-bit vector of [4 x double] containing the result. 7870b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 7880b57cec5SDimitry Andric _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 7890b57cec5SDimitry Andric { 7900b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 7940b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 7950b57cec5SDimitry Andric 7960b57cec5SDimitry Andric #endif /* __FMAINTRIN_H */ 797