xref: /dflybsd-src/contrib/gcc-4.7/gcc/config/i386/fma4intrin.h (revision 04febcfb30580676d3e95f58a16c5137ee478b32)
1*e4b17023SJohn Marino /* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
2*e4b17023SJohn Marino 
3*e4b17023SJohn Marino    This file is part of GCC.
4*e4b17023SJohn Marino 
5*e4b17023SJohn Marino    GCC is free software; you can redistribute it and/or modify
6*e4b17023SJohn Marino    it under the terms of the GNU General Public License as published by
7*e4b17023SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
8*e4b17023SJohn Marino    any later version.
9*e4b17023SJohn Marino 
10*e4b17023SJohn Marino    GCC is distributed in the hope that it will be useful,
11*e4b17023SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*e4b17023SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*e4b17023SJohn Marino    GNU General Public License for more details.
14*e4b17023SJohn Marino 
15*e4b17023SJohn Marino    Under Section 7 of GPL version 3, you are granted additional
16*e4b17023SJohn Marino    permissions described in the GCC Runtime Library Exception, version
17*e4b17023SJohn Marino    3.1, as published by the Free Software Foundation.
18*e4b17023SJohn Marino 
19*e4b17023SJohn Marino    You should have received a copy of the GNU General Public License and
20*e4b17023SJohn Marino    a copy of the GCC Runtime Library Exception along with this program;
21*e4b17023SJohn Marino    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*e4b17023SJohn Marino    <http://www.gnu.org/licenses/>.  */
23*e4b17023SJohn Marino 
24*e4b17023SJohn Marino #ifndef _X86INTRIN_H_INCLUDED
25*e4b17023SJohn Marino # error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
26*e4b17023SJohn Marino #endif
27*e4b17023SJohn Marino 
28*e4b17023SJohn Marino #ifndef _FMA4INTRIN_H_INCLUDED
29*e4b17023SJohn Marino #define _FMA4INTRIN_H_INCLUDED
30*e4b17023SJohn Marino 
31*e4b17023SJohn Marino #ifndef __FMA4__
32*e4b17023SJohn Marino # error "FMA4 instruction set not enabled"
33*e4b17023SJohn Marino #else
34*e4b17023SJohn Marino 
35*e4b17023SJohn Marino /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
36*e4b17023SJohn Marino #include <ammintrin.h>
37*e4b17023SJohn Marino 
38*e4b17023SJohn Marino /* 128b Floating point multiply/add type instructions.  */
39*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ps(__m128 __A,__m128 __B,__m128 __C)40*e4b17023SJohn Marino _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
41*e4b17023SJohn Marino {
42*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
43*e4b17023SJohn Marino }
44*e4b17023SJohn Marino 
45*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_pd(__m128d __A,__m128d __B,__m128d __C)46*e4b17023SJohn Marino _mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
47*e4b17023SJohn Marino {
48*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
49*e4b17023SJohn Marino }
50*e4b17023SJohn Marino 
51*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ss(__m128 __A,__m128 __B,__m128 __C)52*e4b17023SJohn Marino _mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
53*e4b17023SJohn Marino {
54*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
55*e4b17023SJohn Marino }
56*e4b17023SJohn Marino 
57*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_sd(__m128d __A,__m128d __B,__m128d __C)58*e4b17023SJohn Marino _mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
59*e4b17023SJohn Marino {
60*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
61*e4b17023SJohn Marino }
62*e4b17023SJohn Marino 
63*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ps(__m128 __A,__m128 __B,__m128 __C)64*e4b17023SJohn Marino _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
65*e4b17023SJohn Marino 
66*e4b17023SJohn Marino {
67*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
68*e4b17023SJohn Marino }
69*e4b17023SJohn Marino 
70*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_pd(__m128d __A,__m128d __B,__m128d __C)71*e4b17023SJohn Marino _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
72*e4b17023SJohn Marino {
73*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
74*e4b17023SJohn Marino }
75*e4b17023SJohn Marino 
76*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_ss(__m128 __A,__m128 __B,__m128 __C)77*e4b17023SJohn Marino _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
78*e4b17023SJohn Marino {
79*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
80*e4b17023SJohn Marino }
81*e4b17023SJohn Marino 
82*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msub_sd(__m128d __A,__m128d __B,__m128d __C)83*e4b17023SJohn Marino _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
84*e4b17023SJohn Marino {
85*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
86*e4b17023SJohn Marino }
87*e4b17023SJohn Marino 
88*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ps(__m128 __A,__m128 __B,__m128 __C)89*e4b17023SJohn Marino _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
90*e4b17023SJohn Marino {
91*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
92*e4b17023SJohn Marino }
93*e4b17023SJohn Marino 
94*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_pd(__m128d __A,__m128d __B,__m128d __C)95*e4b17023SJohn Marino _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
96*e4b17023SJohn Marino {
97*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
98*e4b17023SJohn Marino }
99*e4b17023SJohn Marino 
100*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_ss(__m128 __A,__m128 __B,__m128 __C)101*e4b17023SJohn Marino _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
102*e4b17023SJohn Marino {
103*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
104*e4b17023SJohn Marino }
105*e4b17023SJohn Marino 
106*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmacc_sd(__m128d __A,__m128d __B,__m128d __C)107*e4b17023SJohn Marino _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
108*e4b17023SJohn Marino {
109*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
110*e4b17023SJohn Marino }
111*e4b17023SJohn Marino 
112*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ps(__m128 __A,__m128 __B,__m128 __C)113*e4b17023SJohn Marino _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
114*e4b17023SJohn Marino {
115*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
116*e4b17023SJohn Marino }
117*e4b17023SJohn Marino 
118*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_pd(__m128d __A,__m128d __B,__m128d __C)119*e4b17023SJohn Marino _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
120*e4b17023SJohn Marino {
121*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
122*e4b17023SJohn Marino }
123*e4b17023SJohn Marino 
124*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ss(__m128 __A,__m128 __B,__m128 __C)125*e4b17023SJohn Marino _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
126*e4b17023SJohn Marino {
127*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
128*e4b17023SJohn Marino }
129*e4b17023SJohn Marino 
130*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_sd(__m128d __A,__m128d __B,__m128d __C)131*e4b17023SJohn Marino _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
132*e4b17023SJohn Marino {
133*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
134*e4b17023SJohn Marino }
135*e4b17023SJohn Marino 
136*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_ps(__m128 __A,__m128 __B,__m128 __C)137*e4b17023SJohn Marino _mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
138*e4b17023SJohn Marino {
139*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
140*e4b17023SJohn Marino }
141*e4b17023SJohn Marino 
142*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_pd(__m128d __A,__m128d __B,__m128d __C)143*e4b17023SJohn Marino _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
144*e4b17023SJohn Marino {
145*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
146*e4b17023SJohn Marino }
147*e4b17023SJohn Marino 
148*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_ps(__m128 __A,__m128 __B,__m128 __C)149*e4b17023SJohn Marino _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
150*e4b17023SJohn Marino {
151*e4b17023SJohn Marino   return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
152*e4b17023SJohn Marino }
153*e4b17023SJohn Marino 
154*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_pd(__m128d __A,__m128d __B,__m128d __C)155*e4b17023SJohn Marino _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
156*e4b17023SJohn Marino {
157*e4b17023SJohn Marino   return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
158*e4b17023SJohn Marino }
159*e4b17023SJohn Marino 
160*e4b17023SJohn Marino /* 256b Floating point multiply/add type instructions.  */
161*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_ps(__m256 __A,__m256 __B,__m256 __C)162*e4b17023SJohn Marino _mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
163*e4b17023SJohn Marino {
164*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
165*e4b17023SJohn Marino }
166*e4b17023SJohn Marino 
167*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_pd(__m256d __A,__m256d __B,__m256d __C)168*e4b17023SJohn Marino _mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
169*e4b17023SJohn Marino {
170*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
171*e4b17023SJohn Marino }
172*e4b17023SJohn Marino 
173*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_ps(__m256 __A,__m256 __B,__m256 __C)174*e4b17023SJohn Marino _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
175*e4b17023SJohn Marino 
176*e4b17023SJohn Marino {
177*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
178*e4b17023SJohn Marino }
179*e4b17023SJohn Marino 
180*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_pd(__m256d __A,__m256d __B,__m256d __C)181*e4b17023SJohn Marino _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
182*e4b17023SJohn Marino {
183*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
184*e4b17023SJohn Marino }
185*e4b17023SJohn Marino 
186*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_ps(__m256 __A,__m256 __B,__m256 __C)187*e4b17023SJohn Marino _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
188*e4b17023SJohn Marino {
189*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
190*e4b17023SJohn Marino }
191*e4b17023SJohn Marino 
192*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_pd(__m256d __A,__m256d __B,__m256d __C)193*e4b17023SJohn Marino _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
194*e4b17023SJohn Marino {
195*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
196*e4b17023SJohn Marino }
197*e4b17023SJohn Marino 
198*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_ps(__m256 __A,__m256 __B,__m256 __C)199*e4b17023SJohn Marino _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
200*e4b17023SJohn Marino {
201*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
202*e4b17023SJohn Marino }
203*e4b17023SJohn Marino 
204*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_pd(__m256d __A,__m256d __B,__m256d __C)205*e4b17023SJohn Marino _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
206*e4b17023SJohn Marino {
207*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
208*e4b17023SJohn Marino }
209*e4b17023SJohn Marino 
210*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_ps(__m256 __A,__m256 __B,__m256 __C)211*e4b17023SJohn Marino _mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
212*e4b17023SJohn Marino {
213*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
214*e4b17023SJohn Marino }
215*e4b17023SJohn Marino 
216*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_pd(__m256d __A,__m256d __B,__m256d __C)217*e4b17023SJohn Marino _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
218*e4b17023SJohn Marino {
219*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
220*e4b17023SJohn Marino }
221*e4b17023SJohn Marino 
222*e4b17023SJohn Marino extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_ps(__m256 __A,__m256 __B,__m256 __C)223*e4b17023SJohn Marino _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
224*e4b17023SJohn Marino {
225*e4b17023SJohn Marino   return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
226*e4b17023SJohn Marino }
227*e4b17023SJohn Marino 
228*e4b17023SJohn Marino extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_pd(__m256d __A,__m256d __B,__m256d __C)229*e4b17023SJohn Marino _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
230*e4b17023SJohn Marino {
231*e4b17023SJohn Marino   return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
232*e4b17023SJohn Marino }
233*e4b17023SJohn Marino 
234*e4b17023SJohn Marino #endif
235*e4b17023SJohn Marino 
236*e4b17023SJohn Marino #endif
237