xref: /openbsd-src/gnu/llvm/clang/lib/Headers/fma4intrin.h (revision e5dd70708596ae51455a0ffa086a00c5b29f8583)
1*e5dd7070Spatrick /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2*e5dd7070Spatrick  *
3*e5dd7070Spatrick  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e5dd7070Spatrick  * See https://llvm.org/LICENSE.txt for license information.
5*e5dd7070Spatrick  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e5dd7070Spatrick  *
7*e5dd7070Spatrick  *===-----------------------------------------------------------------------===
8*e5dd7070Spatrick  */
9*e5dd7070Spatrick 
10*e5dd7070Spatrick #ifndef __X86INTRIN_H
11*e5dd7070Spatrick #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
12*e5dd7070Spatrick #endif
13*e5dd7070Spatrick 
14*e5dd7070Spatrick #ifndef __FMA4INTRIN_H
15*e5dd7070Spatrick #define __FMA4INTRIN_H
16*e5dd7070Spatrick 
17*e5dd7070Spatrick #include <pmmintrin.h>
18*e5dd7070Spatrick 
19*e5dd7070Spatrick /* Define the default attributes for the functions in this file. */
20*e5dd7070Spatrick #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
21*e5dd7070Spatrick #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
22*e5dd7070Spatrick 
23*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_macc_ps(__m128 __A,__m128 __B,__m128 __C)24*e5dd7070Spatrick _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
25*e5dd7070Spatrick {
26*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
27*e5dd7070Spatrick }
28*e5dd7070Spatrick 
29*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_macc_pd(__m128d __A,__m128d __B,__m128d __C)30*e5dd7070Spatrick _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
31*e5dd7070Spatrick {
32*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
33*e5dd7070Spatrick }
34*e5dd7070Spatrick 
35*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_macc_ss(__m128 __A,__m128 __B,__m128 __C)36*e5dd7070Spatrick _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
37*e5dd7070Spatrick {
38*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
39*e5dd7070Spatrick }
40*e5dd7070Spatrick 
41*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_macc_sd(__m128d __A,__m128d __B,__m128d __C)42*e5dd7070Spatrick _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
43*e5dd7070Spatrick {
44*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
45*e5dd7070Spatrick }
46*e5dd7070Spatrick 
47*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msub_ps(__m128 __A,__m128 __B,__m128 __C)48*e5dd7070Spatrick _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
49*e5dd7070Spatrick {
50*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
51*e5dd7070Spatrick }
52*e5dd7070Spatrick 
53*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msub_pd(__m128d __A,__m128d __B,__m128d __C)54*e5dd7070Spatrick _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
55*e5dd7070Spatrick {
56*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
57*e5dd7070Spatrick }
58*e5dd7070Spatrick 
59*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msub_ss(__m128 __A,__m128 __B,__m128 __C)60*e5dd7070Spatrick _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
61*e5dd7070Spatrick {
62*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
63*e5dd7070Spatrick }
64*e5dd7070Spatrick 
65*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msub_sd(__m128d __A,__m128d __B,__m128d __C)66*e5dd7070Spatrick _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
67*e5dd7070Spatrick {
68*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
69*e5dd7070Spatrick }
70*e5dd7070Spatrick 
71*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmacc_ps(__m128 __A,__m128 __B,__m128 __C)72*e5dd7070Spatrick _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
73*e5dd7070Spatrick {
74*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
75*e5dd7070Spatrick }
76*e5dd7070Spatrick 
77*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmacc_pd(__m128d __A,__m128d __B,__m128d __C)78*e5dd7070Spatrick _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
79*e5dd7070Spatrick {
80*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
81*e5dd7070Spatrick }
82*e5dd7070Spatrick 
83*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmacc_ss(__m128 __A,__m128 __B,__m128 __C)84*e5dd7070Spatrick _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
85*e5dd7070Spatrick {
86*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
87*e5dd7070Spatrick }
88*e5dd7070Spatrick 
89*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmacc_sd(__m128d __A,__m128d __B,__m128d __C)90*e5dd7070Spatrick _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
91*e5dd7070Spatrick {
92*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
93*e5dd7070Spatrick }
94*e5dd7070Spatrick 
95*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmsub_ps(__m128 __A,__m128 __B,__m128 __C)96*e5dd7070Spatrick _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
97*e5dd7070Spatrick {
98*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
99*e5dd7070Spatrick }
100*e5dd7070Spatrick 
101*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmsub_pd(__m128d __A,__m128d __B,__m128d __C)102*e5dd7070Spatrick _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
103*e5dd7070Spatrick {
104*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
105*e5dd7070Spatrick }
106*e5dd7070Spatrick 
107*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmsub_ss(__m128 __A,__m128 __B,__m128 __C)108*e5dd7070Spatrick _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
109*e5dd7070Spatrick {
110*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
111*e5dd7070Spatrick }
112*e5dd7070Spatrick 
113*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmsub_sd(__m128d __A,__m128d __B,__m128d __C)114*e5dd7070Spatrick _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
115*e5dd7070Spatrick {
116*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
117*e5dd7070Spatrick }
118*e5dd7070Spatrick 
119*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maddsub_ps(__m128 __A,__m128 __B,__m128 __C)120*e5dd7070Spatrick _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
121*e5dd7070Spatrick {
122*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
123*e5dd7070Spatrick }
124*e5dd7070Spatrick 
125*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maddsub_pd(__m128d __A,__m128d __B,__m128d __C)126*e5dd7070Spatrick _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
127*e5dd7070Spatrick {
128*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
129*e5dd7070Spatrick }
130*e5dd7070Spatrick 
131*e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msubadd_ps(__m128 __A,__m128 __B,__m128 __C)132*e5dd7070Spatrick _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
133*e5dd7070Spatrick {
134*e5dd7070Spatrick   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
135*e5dd7070Spatrick }
136*e5dd7070Spatrick 
137*e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msubadd_pd(__m128d __A,__m128d __B,__m128d __C)138*e5dd7070Spatrick _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
139*e5dd7070Spatrick {
140*e5dd7070Spatrick   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
141*e5dd7070Spatrick }
142*e5dd7070Spatrick 
143*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_macc_ps(__m256 __A,__m256 __B,__m256 __C)144*e5dd7070Spatrick _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
145*e5dd7070Spatrick {
146*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
147*e5dd7070Spatrick }
148*e5dd7070Spatrick 
149*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_macc_pd(__m256d __A,__m256d __B,__m256d __C)150*e5dd7070Spatrick _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
151*e5dd7070Spatrick {
152*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
153*e5dd7070Spatrick }
154*e5dd7070Spatrick 
155*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_msub_ps(__m256 __A,__m256 __B,__m256 __C)156*e5dd7070Spatrick _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
157*e5dd7070Spatrick {
158*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
159*e5dd7070Spatrick }
160*e5dd7070Spatrick 
161*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_msub_pd(__m256d __A,__m256d __B,__m256d __C)162*e5dd7070Spatrick _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
163*e5dd7070Spatrick {
164*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
165*e5dd7070Spatrick }
166*e5dd7070Spatrick 
167*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_nmacc_ps(__m256 __A,__m256 __B,__m256 __C)168*e5dd7070Spatrick _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
169*e5dd7070Spatrick {
170*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
171*e5dd7070Spatrick }
172*e5dd7070Spatrick 
173*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_nmacc_pd(__m256d __A,__m256d __B,__m256d __C)174*e5dd7070Spatrick _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
175*e5dd7070Spatrick {
176*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
177*e5dd7070Spatrick }
178*e5dd7070Spatrick 
179*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_nmsub_ps(__m256 __A,__m256 __B,__m256 __C)180*e5dd7070Spatrick _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
181*e5dd7070Spatrick {
182*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
183*e5dd7070Spatrick }
184*e5dd7070Spatrick 
185*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_nmsub_pd(__m256d __A,__m256d __B,__m256d __C)186*e5dd7070Spatrick _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
187*e5dd7070Spatrick {
188*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
189*e5dd7070Spatrick }
190*e5dd7070Spatrick 
191*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maddsub_ps(__m256 __A,__m256 __B,__m256 __C)192*e5dd7070Spatrick _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
193*e5dd7070Spatrick {
194*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
195*e5dd7070Spatrick }
196*e5dd7070Spatrick 
197*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_maddsub_pd(__m256d __A,__m256d __B,__m256d __C)198*e5dd7070Spatrick _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
199*e5dd7070Spatrick {
200*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
201*e5dd7070Spatrick }
202*e5dd7070Spatrick 
203*e5dd7070Spatrick static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_msubadd_ps(__m256 __A,__m256 __B,__m256 __C)204*e5dd7070Spatrick _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
205*e5dd7070Spatrick {
206*e5dd7070Spatrick   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
207*e5dd7070Spatrick }
208*e5dd7070Spatrick 
209*e5dd7070Spatrick static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_msubadd_pd(__m256d __A,__m256d __B,__m256d __C)210*e5dd7070Spatrick _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
211*e5dd7070Spatrick {
212*e5dd7070Spatrick   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
213*e5dd7070Spatrick }
214*e5dd7070Spatrick 
215*e5dd7070Spatrick #undef __DEFAULT_FN_ATTRS128
216*e5dd7070Spatrick #undef __DEFAULT_FN_ATTRS256
217*e5dd7070Spatrick 
218*e5dd7070Spatrick #endif /* __FMA4INTRIN_H */
219