xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/mm3dnow.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2004-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj /* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
25*38fd1498Szrj    MSVC 7.1.  */
26*38fd1498Szrj 
27*38fd1498Szrj #ifndef _MM3DNOW_H_INCLUDED
28*38fd1498Szrj #define _MM3DNOW_H_INCLUDED
29*38fd1498Szrj 
30*38fd1498Szrj #include <mmintrin.h>
31*38fd1498Szrj #include <prfchwintrin.h>
32*38fd1498Szrj 
33*38fd1498Szrj #if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
34*38fd1498Szrj #pragma GCC push_options
35*38fd1498Szrj #ifdef __x86_64__
36*38fd1498Szrj #pragma GCC target("sse,3dnow")
37*38fd1498Szrj #else
38*38fd1498Szrj #pragma GCC target("3dnow")
39*38fd1498Szrj #endif
40*38fd1498Szrj #define __DISABLE_3dNOW__
41*38fd1498Szrj #endif /* __3dNOW__ */
42*38fd1498Szrj 
43*38fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms(void)44*38fd1498Szrj _m_femms (void)
45*38fd1498Szrj {
46*38fd1498Szrj   __builtin_ia32_femms();
47*38fd1498Szrj }
48*38fd1498Szrj 
49*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pavgusb(__m64 __A,__m64 __B)50*38fd1498Szrj _m_pavgusb (__m64 __A, __m64 __B)
51*38fd1498Szrj {
52*38fd1498Szrj   return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
53*38fd1498Szrj }
54*38fd1498Szrj 
55*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2id(__m64 __A)56*38fd1498Szrj _m_pf2id (__m64 __A)
57*38fd1498Szrj {
58*38fd1498Szrj   return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
59*38fd1498Szrj }
60*38fd1498Szrj 
61*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfacc(__m64 __A,__m64 __B)62*38fd1498Szrj _m_pfacc (__m64 __A, __m64 __B)
63*38fd1498Szrj {
64*38fd1498Szrj   return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
65*38fd1498Szrj }
66*38fd1498Szrj 
67*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfadd(__m64 __A,__m64 __B)68*38fd1498Szrj _m_pfadd (__m64 __A, __m64 __B)
69*38fd1498Szrj {
70*38fd1498Szrj   return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
71*38fd1498Szrj }
72*38fd1498Szrj 
73*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpeq(__m64 __A,__m64 __B)74*38fd1498Szrj _m_pfcmpeq (__m64 __A, __m64 __B)
75*38fd1498Szrj {
76*38fd1498Szrj   return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
77*38fd1498Szrj }
78*38fd1498Szrj 
79*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpge(__m64 __A,__m64 __B)80*38fd1498Szrj _m_pfcmpge (__m64 __A, __m64 __B)
81*38fd1498Szrj {
82*38fd1498Szrj   return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfcmpgt(__m64 __A,__m64 __B)86*38fd1498Szrj _m_pfcmpgt (__m64 __A, __m64 __B)
87*38fd1498Szrj {
88*38fd1498Szrj   return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
89*38fd1498Szrj }
90*38fd1498Szrj 
91*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmax(__m64 __A,__m64 __B)92*38fd1498Szrj _m_pfmax (__m64 __A, __m64 __B)
93*38fd1498Szrj {
94*38fd1498Szrj   return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
95*38fd1498Szrj }
96*38fd1498Szrj 
97*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmin(__m64 __A,__m64 __B)98*38fd1498Szrj _m_pfmin (__m64 __A, __m64 __B)
99*38fd1498Szrj {
100*38fd1498Szrj   return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
101*38fd1498Szrj }
102*38fd1498Szrj 
103*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfmul(__m64 __A,__m64 __B)104*38fd1498Szrj _m_pfmul (__m64 __A, __m64 __B)
105*38fd1498Szrj {
106*38fd1498Szrj   return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
107*38fd1498Szrj }
108*38fd1498Szrj 
109*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcp(__m64 __A)110*38fd1498Szrj _m_pfrcp (__m64 __A)
111*38fd1498Szrj {
112*38fd1498Szrj   return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
113*38fd1498Szrj }
114*38fd1498Szrj 
115*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit1(__m64 __A,__m64 __B)116*38fd1498Szrj _m_pfrcpit1 (__m64 __A, __m64 __B)
117*38fd1498Szrj {
118*38fd1498Szrj   return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
119*38fd1498Szrj }
120*38fd1498Szrj 
121*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrcpit2(__m64 __A,__m64 __B)122*38fd1498Szrj _m_pfrcpit2 (__m64 __A, __m64 __B)
123*38fd1498Szrj {
124*38fd1498Szrj   return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
125*38fd1498Szrj }
126*38fd1498Szrj 
127*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqrt(__m64 __A)128*38fd1498Szrj _m_pfrsqrt (__m64 __A)
129*38fd1498Szrj {
130*38fd1498Szrj   return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
131*38fd1498Szrj }
132*38fd1498Szrj 
133*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfrsqit1(__m64 __A,__m64 __B)134*38fd1498Szrj _m_pfrsqit1 (__m64 __A, __m64 __B)
135*38fd1498Szrj {
136*38fd1498Szrj   return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
137*38fd1498Szrj }
138*38fd1498Szrj 
139*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsub(__m64 __A,__m64 __B)140*38fd1498Szrj _m_pfsub (__m64 __A, __m64 __B)
141*38fd1498Szrj {
142*38fd1498Szrj   return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
143*38fd1498Szrj }
144*38fd1498Szrj 
145*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfsubr(__m64 __A,__m64 __B)146*38fd1498Szrj _m_pfsubr (__m64 __A, __m64 __B)
147*38fd1498Szrj {
148*38fd1498Szrj   return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
149*38fd1498Szrj }
150*38fd1498Szrj 
151*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fd(__m64 __A)152*38fd1498Szrj _m_pi2fd (__m64 __A)
153*38fd1498Szrj {
154*38fd1498Szrj   return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
155*38fd1498Szrj }
156*38fd1498Szrj 
157*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhrw(__m64 __A,__m64 __B)158*38fd1498Szrj _m_pmulhrw (__m64 __A, __m64 __B)
159*38fd1498Szrj {
160*38fd1498Szrj   return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
161*38fd1498Szrj }
162*38fd1498Szrj 
163*38fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetch(void * __P)164*38fd1498Szrj _m_prefetch (void *__P)
165*38fd1498Szrj {
166*38fd1498Szrj   __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
167*38fd1498Szrj }
168*38fd1498Szrj 
169*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_float(float __A)170*38fd1498Szrj _m_from_float (float __A)
171*38fd1498Szrj {
172*38fd1498Szrj   return __extension__ (__m64)(__v2sf){ __A, 0.0f };
173*38fd1498Szrj }
174*38fd1498Szrj 
175*38fd1498Szrj extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_float(__m64 __A)176*38fd1498Szrj _m_to_float (__m64 __A)
177*38fd1498Szrj {
178*38fd1498Szrj   union { __v2sf v; float a[2]; } __tmp;
179*38fd1498Szrj   __tmp.v = (__v2sf)__A;
180*38fd1498Szrj   return __tmp.a[0];
181*38fd1498Szrj }
182*38fd1498Szrj 
183*38fd1498Szrj #ifdef __DISABLE_3dNOW__
184*38fd1498Szrj #undef __DISABLE_3dNOW__
185*38fd1498Szrj #pragma GCC pop_options
186*38fd1498Szrj #endif /* __DISABLE_3dNOW__ */
187*38fd1498Szrj 
188*38fd1498Szrj #if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
189*38fd1498Szrj #pragma GCC push_options
190*38fd1498Szrj #ifdef __x86_64__
191*38fd1498Szrj #pragma GCC target("sse,3dnowa")
192*38fd1498Szrj #else
193*38fd1498Szrj #pragma GCC target("3dnowa")
194*38fd1498Szrj #endif
195*38fd1498Szrj #define __DISABLE_3dNOW_A__
196*38fd1498Szrj #endif /* __3dNOW_A__ */
197*38fd1498Szrj 
198*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pf2iw(__m64 __A)199*38fd1498Szrj _m_pf2iw (__m64 __A)
200*38fd1498Szrj {
201*38fd1498Szrj   return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
202*38fd1498Szrj }
203*38fd1498Szrj 
204*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfnacc(__m64 __A,__m64 __B)205*38fd1498Szrj _m_pfnacc (__m64 __A, __m64 __B)
206*38fd1498Szrj {
207*38fd1498Szrj   return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
208*38fd1498Szrj }
209*38fd1498Szrj 
210*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pfpnacc(__m64 __A,__m64 __B)211*38fd1498Szrj _m_pfpnacc (__m64 __A, __m64 __B)
212*38fd1498Szrj {
213*38fd1498Szrj   return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
214*38fd1498Szrj }
215*38fd1498Szrj 
216*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pi2fw(__m64 __A)217*38fd1498Szrj _m_pi2fw (__m64 __A)
218*38fd1498Szrj {
219*38fd1498Szrj   return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
220*38fd1498Szrj }
221*38fd1498Szrj 
222*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pswapd(__m64 __A)223*38fd1498Szrj _m_pswapd (__m64 __A)
224*38fd1498Szrj {
225*38fd1498Szrj   return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
226*38fd1498Szrj }
227*38fd1498Szrj 
228*38fd1498Szrj #ifdef __DISABLE_3dNOW_A__
229*38fd1498Szrj #undef __DISABLE_3dNOW_A__
230*38fd1498Szrj #pragma GCC pop_options
231*38fd1498Szrj #endif /* __DISABLE_3dNOW_A__ */
232*38fd1498Szrj 
233*38fd1498Szrj #endif /* _MM3DNOW_H_INCLUDED */
234