xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/ia32intrin.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2009-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj #ifndef _X86INTRIN_H_INCLUDED
25*38fd1498Szrj # error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
26*38fd1498Szrj #endif
27*38fd1498Szrj 
28*38fd1498Szrj /* 32bit bsf */
29*38fd1498Szrj extern __inline int
30*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfd(int __X)31*38fd1498Szrj __bsfd (int __X)
32*38fd1498Szrj {
33*38fd1498Szrj   return __builtin_ctz (__X);
34*38fd1498Szrj }
35*38fd1498Szrj 
36*38fd1498Szrj /* 32bit bsr */
37*38fd1498Szrj extern __inline int
38*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrd(int __X)39*38fd1498Szrj __bsrd (int __X)
40*38fd1498Szrj {
41*38fd1498Szrj   return __builtin_ia32_bsrsi (__X);
42*38fd1498Szrj }
43*38fd1498Szrj 
44*38fd1498Szrj /* 32bit bswap */
45*38fd1498Szrj extern __inline int
46*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapd(int __X)47*38fd1498Szrj __bswapd (int __X)
48*38fd1498Szrj {
49*38fd1498Szrj   return __builtin_bswap32 (__X);
50*38fd1498Szrj }
51*38fd1498Szrj 
52*38fd1498Szrj #ifndef __iamcu__
53*38fd1498Szrj 
54*38fd1498Szrj #ifndef __SSE4_2__
55*38fd1498Szrj #pragma GCC push_options
56*38fd1498Szrj #pragma GCC target("sse4.2")
57*38fd1498Szrj #define __DISABLE_SSE4_2__
58*38fd1498Szrj #endif /* __SSE4_2__ */
59*38fd1498Szrj 
60*38fd1498Szrj /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
61*38fd1498Szrj extern __inline unsigned int
62*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32b(unsigned int __C,unsigned char __V)63*38fd1498Szrj __crc32b (unsigned int __C, unsigned char __V)
64*38fd1498Szrj {
65*38fd1498Szrj   return __builtin_ia32_crc32qi (__C, __V);
66*38fd1498Szrj }
67*38fd1498Szrj 
68*38fd1498Szrj extern __inline unsigned int
69*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32w(unsigned int __C,unsigned short __V)70*38fd1498Szrj __crc32w (unsigned int __C, unsigned short __V)
71*38fd1498Szrj {
72*38fd1498Szrj   return __builtin_ia32_crc32hi (__C, __V);
73*38fd1498Szrj }
74*38fd1498Szrj 
75*38fd1498Szrj extern __inline unsigned int
76*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32d(unsigned int __C,unsigned int __V)77*38fd1498Szrj __crc32d (unsigned int __C, unsigned int __V)
78*38fd1498Szrj {
79*38fd1498Szrj   return __builtin_ia32_crc32si (__C, __V);
80*38fd1498Szrj }
81*38fd1498Szrj 
82*38fd1498Szrj #ifdef __DISABLE_SSE4_2__
83*38fd1498Szrj #undef __DISABLE_SSE4_2__
84*38fd1498Szrj #pragma GCC pop_options
85*38fd1498Szrj #endif /* __DISABLE_SSE4_2__ */
86*38fd1498Szrj 
87*38fd1498Szrj #endif /* __iamcu__ */
88*38fd1498Szrj 
89*38fd1498Szrj /* 32bit popcnt */
90*38fd1498Szrj extern __inline int
91*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__popcntd(unsigned int __X)92*38fd1498Szrj __popcntd (unsigned int __X)
93*38fd1498Szrj {
94*38fd1498Szrj   return __builtin_popcount (__X);
95*38fd1498Szrj }
96*38fd1498Szrj 
97*38fd1498Szrj #ifndef __iamcu__
98*38fd1498Szrj 
99*38fd1498Szrj /* rdpmc */
100*38fd1498Szrj extern __inline unsigned long long
101*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdpmc(int __S)102*38fd1498Szrj __rdpmc (int __S)
103*38fd1498Szrj {
104*38fd1498Szrj   return __builtin_ia32_rdpmc (__S);
105*38fd1498Szrj }
106*38fd1498Szrj 
107*38fd1498Szrj #endif /* __iamcu__ */
108*38fd1498Szrj 
109*38fd1498Szrj /* rdtsc */
110*38fd1498Szrj extern __inline unsigned long long
111*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdtsc(void)112*38fd1498Szrj __rdtsc (void)
113*38fd1498Szrj {
114*38fd1498Szrj   return __builtin_ia32_rdtsc ();
115*38fd1498Szrj }
116*38fd1498Szrj 
117*38fd1498Szrj #ifndef __iamcu__
118*38fd1498Szrj 
119*38fd1498Szrj /* rdtscp */
120*38fd1498Szrj extern __inline unsigned long long
121*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdtscp(unsigned int * __A)122*38fd1498Szrj __rdtscp (unsigned int *__A)
123*38fd1498Szrj {
124*38fd1498Szrj   return __builtin_ia32_rdtscp (__A);
125*38fd1498Szrj }
126*38fd1498Szrj 
127*38fd1498Szrj #endif /* __iamcu__ */
128*38fd1498Szrj 
129*38fd1498Szrj /* 8bit rol */
130*38fd1498Szrj extern __inline unsigned char
131*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolb(unsigned char __X,int __C)132*38fd1498Szrj __rolb (unsigned char __X, int __C)
133*38fd1498Szrj {
134*38fd1498Szrj   return __builtin_ia32_rolqi (__X, __C);
135*38fd1498Szrj }
136*38fd1498Szrj 
137*38fd1498Szrj /* 16bit rol */
138*38fd1498Szrj extern __inline unsigned short
139*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolw(unsigned short __X,int __C)140*38fd1498Szrj __rolw (unsigned short __X, int __C)
141*38fd1498Szrj {
142*38fd1498Szrj   return __builtin_ia32_rolhi (__X, __C);
143*38fd1498Szrj }
144*38fd1498Szrj 
145*38fd1498Szrj /* 32bit rol */
146*38fd1498Szrj extern __inline unsigned int
147*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rold(unsigned int __X,int __C)148*38fd1498Szrj __rold (unsigned int __X, int __C)
149*38fd1498Szrj {
150*38fd1498Szrj   __C &= 31;
151*38fd1498Szrj   return (__X << __C) | (__X >> (-__C & 31));
152*38fd1498Szrj }
153*38fd1498Szrj 
154*38fd1498Szrj /* 8bit ror */
155*38fd1498Szrj extern __inline unsigned char
156*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorb(unsigned char __X,int __C)157*38fd1498Szrj __rorb (unsigned char __X, int __C)
158*38fd1498Szrj {
159*38fd1498Szrj   return __builtin_ia32_rorqi (__X, __C);
160*38fd1498Szrj }
161*38fd1498Szrj 
162*38fd1498Szrj /* 16bit ror */
163*38fd1498Szrj extern __inline unsigned short
164*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorw(unsigned short __X,int __C)165*38fd1498Szrj __rorw (unsigned short __X, int __C)
166*38fd1498Szrj {
167*38fd1498Szrj   return __builtin_ia32_rorhi (__X, __C);
168*38fd1498Szrj }
169*38fd1498Szrj 
170*38fd1498Szrj /* 32bit ror */
171*38fd1498Szrj extern __inline unsigned int
172*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rord(unsigned int __X,int __C)173*38fd1498Szrj __rord (unsigned int __X, int __C)
174*38fd1498Szrj {
175*38fd1498Szrj   __C &= 31;
176*38fd1498Szrj   return (__X >> __C) | (__X << (-__C & 31));
177*38fd1498Szrj }
178*38fd1498Szrj 
179*38fd1498Szrj /* Pause */
180*38fd1498Szrj extern __inline void
181*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__pause(void)182*38fd1498Szrj __pause (void)
183*38fd1498Szrj {
184*38fd1498Szrj   __builtin_ia32_pause ();
185*38fd1498Szrj }
186*38fd1498Szrj 
187*38fd1498Szrj #ifdef __x86_64__
188*38fd1498Szrj /* 64bit bsf */
189*38fd1498Szrj extern __inline int
190*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfq(long long __X)191*38fd1498Szrj __bsfq (long long __X)
192*38fd1498Szrj {
193*38fd1498Szrj   return __builtin_ctzll (__X);
194*38fd1498Szrj }
195*38fd1498Szrj 
196*38fd1498Szrj /* 64bit bsr */
197*38fd1498Szrj extern __inline int
198*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrq(long long __X)199*38fd1498Szrj __bsrq (long long __X)
200*38fd1498Szrj {
201*38fd1498Szrj   return __builtin_ia32_bsrdi (__X);
202*38fd1498Szrj }
203*38fd1498Szrj 
204*38fd1498Szrj /* 64bit bswap */
205*38fd1498Szrj extern __inline long long
206*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapq(long long __X)207*38fd1498Szrj __bswapq (long long __X)
208*38fd1498Szrj {
209*38fd1498Szrj   return __builtin_bswap64 (__X);
210*38fd1498Szrj }
211*38fd1498Szrj 
212*38fd1498Szrj #ifndef __SSE4_2__
213*38fd1498Szrj #pragma GCC push_options
214*38fd1498Szrj #pragma GCC target("sse4.2")
215*38fd1498Szrj #define __DISABLE_SSE4_2__
216*38fd1498Szrj #endif /* __SSE4_2__ */
217*38fd1498Szrj 
218*38fd1498Szrj /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
219*38fd1498Szrj extern __inline unsigned long long
220*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32q(unsigned long long __C,unsigned long long __V)221*38fd1498Szrj __crc32q (unsigned long long __C, unsigned long long __V)
222*38fd1498Szrj {
223*38fd1498Szrj   return __builtin_ia32_crc32di (__C, __V);
224*38fd1498Szrj }
225*38fd1498Szrj 
226*38fd1498Szrj #ifdef __DISABLE_SSE4_2__
227*38fd1498Szrj #undef __DISABLE_SSE4_2__
228*38fd1498Szrj #pragma GCC pop_options
229*38fd1498Szrj #endif /* __DISABLE_SSE4_2__ */
230*38fd1498Szrj 
231*38fd1498Szrj /* 64bit popcnt */
232*38fd1498Szrj extern __inline long long
233*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__popcntq(unsigned long long __X)234*38fd1498Szrj __popcntq (unsigned long long __X)
235*38fd1498Szrj {
236*38fd1498Szrj   return __builtin_popcountll (__X);
237*38fd1498Szrj }
238*38fd1498Szrj 
239*38fd1498Szrj /* 64bit rol */
240*38fd1498Szrj extern __inline unsigned long long
241*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolq(unsigned long long __X,int __C)242*38fd1498Szrj __rolq (unsigned long long __X, int __C)
243*38fd1498Szrj {
244*38fd1498Szrj   __C &= 63;
245*38fd1498Szrj   return (__X << __C) | (__X >> (-__C & 63));
246*38fd1498Szrj }
247*38fd1498Szrj 
248*38fd1498Szrj /* 64bit ror */
249*38fd1498Szrj extern __inline unsigned long long
250*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorq(unsigned long long __X,int __C)251*38fd1498Szrj __rorq (unsigned long long __X, int __C)
252*38fd1498Szrj {
253*38fd1498Szrj   __C &= 63;
254*38fd1498Szrj   return (__X >> __C) | (__X << (-__C & 63));
255*38fd1498Szrj }
256*38fd1498Szrj 
257*38fd1498Szrj /* Read flags register */
258*38fd1498Szrj extern __inline unsigned long long
259*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags(void)260*38fd1498Szrj __readeflags (void)
261*38fd1498Szrj {
262*38fd1498Szrj   return __builtin_ia32_readeflags_u64 ();
263*38fd1498Szrj }
264*38fd1498Szrj 
265*38fd1498Szrj /* Write flags register */
266*38fd1498Szrj extern __inline void
267*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags(unsigned long long __X)268*38fd1498Szrj __writeeflags (unsigned long long __X)
269*38fd1498Szrj {
270*38fd1498Szrj   __builtin_ia32_writeeflags_u64 (__X);
271*38fd1498Szrj }
272*38fd1498Szrj 
273*38fd1498Szrj #define _bswap64(a)		__bswapq(a)
274*38fd1498Szrj #define _popcnt64(a)		__popcntq(a)
275*38fd1498Szrj #else
276*38fd1498Szrj 
277*38fd1498Szrj /* Read flags register */
278*38fd1498Szrj extern __inline unsigned int
279*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags(void)280*38fd1498Szrj __readeflags (void)
281*38fd1498Szrj {
282*38fd1498Szrj   return __builtin_ia32_readeflags_u32 ();
283*38fd1498Szrj }
284*38fd1498Szrj 
285*38fd1498Szrj /* Write flags register */
286*38fd1498Szrj extern __inline void
287*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags(unsigned int __X)288*38fd1498Szrj __writeeflags (unsigned int __X)
289*38fd1498Szrj {
290*38fd1498Szrj   __builtin_ia32_writeeflags_u32 (__X);
291*38fd1498Szrj }
292*38fd1498Szrj 
293*38fd1498Szrj #endif
294*38fd1498Szrj 
295*38fd1498Szrj /* On LP64 systems, longs are 64-bit.  Use the appropriate rotate
296*38fd1498Szrj  * function.  */
297*38fd1498Szrj #ifdef __LP64__
298*38fd1498Szrj #define _lrotl(a,b)		__rolq((a), (b))
299*38fd1498Szrj #define _lrotr(a,b)		__rorq((a), (b))
300*38fd1498Szrj #else
301*38fd1498Szrj #define _lrotl(a,b)		__rold((a), (b))
302*38fd1498Szrj #define _lrotr(a,b)		__rord((a), (b))
303*38fd1498Szrj #endif
304*38fd1498Szrj 
305*38fd1498Szrj #define _bit_scan_forward(a)	__bsfd(a)
306*38fd1498Szrj #define _bit_scan_reverse(a)	__bsrd(a)
307*38fd1498Szrj #define _bswap(a)		__bswapd(a)
308*38fd1498Szrj #define _popcnt32(a)		__popcntd(a)
309*38fd1498Szrj #ifndef __iamcu__
310*38fd1498Szrj #define _rdpmc(a)		__rdpmc(a)
311*38fd1498Szrj #define _rdtscp(a)		__rdtscp(a)
312*38fd1498Szrj #endif /* __iamcu__ */
313*38fd1498Szrj #define _rdtsc()		__rdtsc()
314*38fd1498Szrj #define _rotwl(a,b)		__rolw((a), (b))
315*38fd1498Szrj #define _rotwr(a,b)		__rorw((a), (b))
316*38fd1498Szrj #define _rotl(a,b)		__rold((a), (b))
317*38fd1498Szrj #define _rotr(a,b)		__rord((a), (b))
318