xref: /openbsd-src/gnu/llvm/clang/lib/Headers/fmaintrin.h (revision e5dd70708596ae51455a0ffa086a00c5b29f8583)
1*e5dd7070Spatrick /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2*e5dd7070Spatrick  *
3*e5dd7070Spatrick  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e5dd7070Spatrick  * See https://llvm.org/LICENSE.txt for license information.
5*e5dd7070Spatrick  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e5dd7070Spatrick  *
7*e5dd7070Spatrick  *===-----------------------------------------------------------------------===
8*e5dd7070Spatrick  */
9*e5dd7070Spatrick 
10*e5dd7070Spatrick #ifndef __IMMINTRIN_H
11*e5dd7070Spatrick #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12*e5dd7070Spatrick #endif
13*e5dd7070Spatrick 
14*e5dd7070Spatrick #ifndef __FMAINTRIN_H
15*e5dd7070Spatrick #define __FMAINTRIN_H
16*e5dd7070Spatrick 
17*e5dd7070Spatrick /* Define the default attributes for the functions in this file. */
18*e5dd7070Spatrick #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19*e5dd7070Spatrick #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20*e5dd7070Spatrick 
21*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ps(__m128 __A,__m128 __B,__m128 __C)22*e5dd7070Spatrick _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
23*e5dd7070Spatrick {
24*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
25*e5dd7070Spatrick }
26*e5dd7070Spatrick 
27*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_pd(__m128d __A,__m128d __B,__m128d __C)28*e5dd7070Spatrick _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
29*e5dd7070Spatrick {
30*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
31*e5dd7070Spatrick }
32*e5dd7070Spatrick 
33*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ss(__m128 __A,__m128 __B,__m128 __C)34*e5dd7070Spatrick _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
35*e5dd7070Spatrick {
36*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
37*e5dd7070Spatrick }
38*e5dd7070Spatrick 
39*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_sd(__m128d __A,__m128d __B,__m128d __C)40*e5dd7070Spatrick _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
41*e5dd7070Spatrick {
42*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
43*e5dd7070Spatrick }
44*e5dd7070Spatrick 
45*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ps(__m128 __A,__m128 __B,__m128 __C)46*e5dd7070Spatrick _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
47*e5dd7070Spatrick {
48*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
49*e5dd7070Spatrick }
50*e5dd7070Spatrick 
51*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_pd(__m128d __A,__m128d __B,__m128d __C)52*e5dd7070Spatrick _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
53*e5dd7070Spatrick {
54*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
55*e5dd7070Spatrick }
56*e5dd7070Spatrick 
57*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ss(__m128 __A,__m128 __B,__m128 __C)58*e5dd7070Spatrick _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
59*e5dd7070Spatrick {
60*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
61*e5dd7070Spatrick }
62*e5dd7070Spatrick 
63*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_sd(__m128d __A,__m128d __B,__m128d __C)64*e5dd7070Spatrick _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
65*e5dd7070Spatrick {
66*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
67*e5dd7070Spatrick }
68*e5dd7070Spatrick 
69*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ps(__m128 __A,__m128 __B,__m128 __C)70*e5dd7070Spatrick _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
71*e5dd7070Spatrick {
72*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
73*e5dd7070Spatrick }
74*e5dd7070Spatrick 
75*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_pd(__m128d __A,__m128d __B,__m128d __C)76*e5dd7070Spatrick _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
77*e5dd7070Spatrick {
78*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
79*e5dd7070Spatrick }
80*e5dd7070Spatrick 
81*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ss(__m128 __A,__m128 __B,__m128 __C)82*e5dd7070Spatrick _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
83*e5dd7070Spatrick {
84*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
85*e5dd7070Spatrick }
86*e5dd7070Spatrick 
87*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_sd(__m128d __A,__m128d __B,__m128d __C)88*e5dd7070Spatrick _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
89*e5dd7070Spatrick {
90*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
91*e5dd7070Spatrick }
92*e5dd7070Spatrick 
93*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ps(__m128 __A,__m128 __B,__m128 __C)94*e5dd7070Spatrick _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
95*e5dd7070Spatrick {
96*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
97*e5dd7070Spatrick }
98*e5dd7070Spatrick 
99*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_pd(__m128d __A,__m128d __B,__m128d __C)100*e5dd7070Spatrick _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
101*e5dd7070Spatrick {
102*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
103*e5dd7070Spatrick }
104*e5dd7070Spatrick 
105*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ss(__m128 __A,__m128 __B,__m128 __C)106*e5dd7070Spatrick _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
107*e5dd7070Spatrick {
108*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
109*e5dd7070Spatrick }
110*e5dd7070Spatrick 
111*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_sd(__m128d __A,__m128d __B,__m128d __C)112*e5dd7070Spatrick _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
113*e5dd7070Spatrick {
114*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
115*e5dd7070Spatrick }
116*e5dd7070Spatrick 
117*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmaddsub_ps(__m128 __A,__m128 __B,__m128 __C)118*e5dd7070Spatrick _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
119*e5dd7070Spatrick {
120*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
121*e5dd7070Spatrick }
122*e5dd7070Spatrick 
123*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmaddsub_pd(__m128d __A,__m128d __B,__m128d __C)124*e5dd7070Spatrick _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
125*e5dd7070Spatrick {
126*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
127*e5dd7070Spatrick }
128*e5dd7070Spatrick 
129*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsubadd_ps(__m128 __A,__m128 __B,__m128 __C)130*e5dd7070Spatrick _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
131*e5dd7070Spatrick {
132*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
133*e5dd7070Spatrick }
134*e5dd7070Spatrick 
135*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsubadd_pd(__m128d __A,__m128d __B,__m128d __C)136*e5dd7070Spatrick _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
137*e5dd7070Spatrick {
138*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
139*e5dd7070Spatrick }
140*e5dd7070Spatrick 
141*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmadd_ps(__m256 __A,__m256 __B,__m256 __C)142*e5dd7070Spatrick _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
143*e5dd7070Spatrick {
144*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
145*e5dd7070Spatrick }
146*e5dd7070Spatrick 
147*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmadd_pd(__m256d __A,__m256d __B,__m256d __C)148*e5dd7070Spatrick _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
149*e5dd7070Spatrick {
150*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
151*e5dd7070Spatrick }
152*e5dd7070Spatrick 
153*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsub_ps(__m256 __A,__m256 __B,__m256 __C)154*e5dd7070Spatrick _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
155*e5dd7070Spatrick {
156*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
157*e5dd7070Spatrick }
158*e5dd7070Spatrick 
159*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsub_pd(__m256d __A,__m256d __B,__m256d __C)160*e5dd7070Spatrick _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
161*e5dd7070Spatrick {
162*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
163*e5dd7070Spatrick }
164*e5dd7070Spatrick 
165*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmadd_ps(__m256 __A,__m256 __B,__m256 __C)166*e5dd7070Spatrick _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
167*e5dd7070Spatrick {
168*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
169*e5dd7070Spatrick }
170*e5dd7070Spatrick 
171*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmadd_pd(__m256d __A,__m256d __B,__m256d __C)172*e5dd7070Spatrick _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
173*e5dd7070Spatrick {
174*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
175*e5dd7070Spatrick }
176*e5dd7070Spatrick 
177*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmsub_ps(__m256 __A,__m256 __B,__m256 __C)178*e5dd7070Spatrick _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
179*e5dd7070Spatrick {
180*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
181*e5dd7070Spatrick }
182*e5dd7070Spatrick 
183*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmsub_pd(__m256d __A,__m256d __B,__m256d __C)184*e5dd7070Spatrick _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
185*e5dd7070Spatrick {
186*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
187*e5dd7070Spatrick }
188*e5dd7070Spatrick 
189*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_ps(__m256 __A,__m256 __B,__m256 __C)190*e5dd7070Spatrick _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
191*e5dd7070Spatrick {
192*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
193*e5dd7070Spatrick }
194*e5dd7070Spatrick 
195*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_pd(__m256d __A,__m256d __B,__m256d __C)196*e5dd7070Spatrick _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
197*e5dd7070Spatrick {
198*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
199*e5dd7070Spatrick }
200*e5dd7070Spatrick 
201*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_ps(__m256 __A,__m256 __B,__m256 __C)202*e5dd7070Spatrick _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
203*e5dd7070Spatrick {
204*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
205*e5dd7070Spatrick }
206*e5dd7070Spatrick 
207*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_pd(__m256d __A,__m256d __B,__m256d __C)208*e5dd7070Spatrick _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
209*e5dd7070Spatrick {
210*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
211*e5dd7070Spatrick }
212*e5dd7070Spatrick 
213*e5dd7070Spatrick #undef __DEFAULT_FN_ATTRS128
214*e5dd7070Spatrick #undef __DEFAULT_FN_ATTRS256
215*e5dd7070Spatrick 
216*e5dd7070Spatrick #endif /* __FMAINTRIN_H */
217