xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/avx512pfintrin.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj #ifndef _IMMINTRIN_H_INCLUDED
25*38fd1498Szrj #error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
26*38fd1498Szrj #endif
27*38fd1498Szrj 
28*38fd1498Szrj #ifndef _AVX512PFINTRIN_H_INCLUDED
29*38fd1498Szrj #define _AVX512PFINTRIN_H_INCLUDED
30*38fd1498Szrj 
31*38fd1498Szrj #ifndef __AVX512PF__
32*38fd1498Szrj #pragma GCC push_options
33*38fd1498Szrj #pragma GCC target("avx512pf")
34*38fd1498Szrj #define __DISABLE_AVX512PF__
35*38fd1498Szrj #endif /* __AVX512PF__ */
36*38fd1498Szrj 
37*38fd1498Szrj /* Internal data types for implementing the intrinsics.  */
38*38fd1498Szrj typedef long long __v8di __attribute__ ((__vector_size__ (64)));
39*38fd1498Szrj typedef int __v16si __attribute__ ((__vector_size__ (64)));
40*38fd1498Szrj 
41*38fd1498Szrj /* The Intel API is flexible enough that we must allow aliasing with other
42*38fd1498Szrj    vector types, and their scalar components.  */
43*38fd1498Szrj typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
44*38fd1498Szrj 
45*38fd1498Szrj typedef unsigned char  __mmask8;
46*38fd1498Szrj typedef unsigned short __mmask16;
47*38fd1498Szrj 
48*38fd1498Szrj #ifdef __OPTIMIZE__
49*38fd1498Szrj extern __inline void
50*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_pd(__m256i __index,void const * __addr,int __scale,int __hint)51*38fd1498Szrj _mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
52*38fd1498Szrj 			      int __scale, int __hint)
53*38fd1498Szrj {
54*38fd1498Szrj   __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
55*38fd1498Szrj 			      __scale, __hint);
56*38fd1498Szrj }
57*38fd1498Szrj 
58*38fd1498Szrj extern __inline void
59*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_ps(__m512i __index,void const * __addr,int __scale,int __hint)60*38fd1498Szrj _mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
61*38fd1498Szrj 			      int __scale, int __hint)
62*38fd1498Szrj {
63*38fd1498Szrj   __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
64*38fd1498Szrj 			      __scale, __hint);
65*38fd1498Szrj }
66*38fd1498Szrj 
67*38fd1498Szrj extern __inline void
68*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_pd(__m256i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)69*38fd1498Szrj _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
70*38fd1498Szrj 				   void const *__addr, int __scale, int __hint)
71*38fd1498Szrj {
72*38fd1498Szrj   __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
73*38fd1498Szrj 			      __hint);
74*38fd1498Szrj }
75*38fd1498Szrj 
76*38fd1498Szrj extern __inline void
77*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_ps(__m512i __index,__mmask16 __mask,void const * __addr,int __scale,int __hint)78*38fd1498Szrj _mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
79*38fd1498Szrj 				   void const *__addr, int __scale, int __hint)
80*38fd1498Szrj {
81*38fd1498Szrj   __builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
82*38fd1498Szrj 			      __hint);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj extern __inline void
86*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_pd(__m512i __index,void const * __addr,int __scale,int __hint)87*38fd1498Szrj _mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
88*38fd1498Szrj 			      int __scale, int __hint)
89*38fd1498Szrj {
90*38fd1498Szrj   __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
91*38fd1498Szrj 			      __scale, __hint);
92*38fd1498Szrj }
93*38fd1498Szrj 
94*38fd1498Szrj extern __inline void
95*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_ps(__m512i __index,void const * __addr,int __scale,int __hint)96*38fd1498Szrj _mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
97*38fd1498Szrj 			      int __scale, int __hint)
98*38fd1498Szrj {
99*38fd1498Szrj   __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
100*38fd1498Szrj 			      __scale, __hint);
101*38fd1498Szrj }
102*38fd1498Szrj 
103*38fd1498Szrj extern __inline void
104*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_pd(__m512i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)105*38fd1498Szrj _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
106*38fd1498Szrj 				   void const *__addr, int __scale, int __hint)
107*38fd1498Szrj {
108*38fd1498Szrj   __builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
109*38fd1498Szrj 			      __hint);
110*38fd1498Szrj }
111*38fd1498Szrj 
112*38fd1498Szrj extern __inline void
113*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_ps(__m512i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)114*38fd1498Szrj _mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
115*38fd1498Szrj 				   void const *__addr, int __scale, int __hint)
116*38fd1498Szrj {
117*38fd1498Szrj   __builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
118*38fd1498Szrj 			      __hint);
119*38fd1498Szrj }
120*38fd1498Szrj 
121*38fd1498Szrj extern __inline void
122*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_pd(void * __addr,__m256i __index,int __scale,int __hint)123*38fd1498Szrj _mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
124*38fd1498Szrj 			       int __hint)
125*38fd1498Szrj {
126*38fd1498Szrj   __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
127*38fd1498Szrj 			      __scale, __hint);
128*38fd1498Szrj }
129*38fd1498Szrj 
130*38fd1498Szrj extern __inline void
131*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_ps(void * __addr,__m512i __index,int __scale,int __hint)132*38fd1498Szrj _mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
133*38fd1498Szrj 			       int __hint)
134*38fd1498Szrj {
135*38fd1498Szrj   __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
136*38fd1498Szrj 			      __scale, __hint);
137*38fd1498Szrj }
138*38fd1498Szrj 
139*38fd1498Szrj extern __inline void
140*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_pd(void * __addr,__mmask8 __mask,__m256i __index,int __scale,int __hint)141*38fd1498Szrj _mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
142*38fd1498Szrj 				    __m256i __index, int __scale, int __hint)
143*38fd1498Szrj {
144*38fd1498Szrj   __builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
145*38fd1498Szrj 			       __hint);
146*38fd1498Szrj }
147*38fd1498Szrj 
148*38fd1498Szrj extern __inline void
149*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_ps(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)150*38fd1498Szrj _mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
151*38fd1498Szrj 				    __m512i __index, int __scale, int __hint)
152*38fd1498Szrj {
153*38fd1498Szrj   __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
154*38fd1498Szrj 			       __hint);
155*38fd1498Szrj }
156*38fd1498Szrj 
157*38fd1498Szrj extern __inline void
158*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_pd(void * __addr,__m512i __index,int __scale,int __hint)159*38fd1498Szrj _mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
160*38fd1498Szrj 			       int __hint)
161*38fd1498Szrj {
162*38fd1498Szrj   __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
163*38fd1498Szrj 			      __scale, __hint);
164*38fd1498Szrj }
165*38fd1498Szrj 
166*38fd1498Szrj extern __inline void
167*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_ps(void * __addr,__m512i __index,int __scale,int __hint)168*38fd1498Szrj _mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
169*38fd1498Szrj 			       int __hint)
170*38fd1498Szrj {
171*38fd1498Szrj   __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
172*38fd1498Szrj 			      __scale, __hint);
173*38fd1498Szrj }
174*38fd1498Szrj 
175*38fd1498Szrj extern __inline void
176*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_pd(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)177*38fd1498Szrj _mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask16 __mask,
178*38fd1498Szrj 				    __m512i __index, int __scale, int __hint)
179*38fd1498Szrj {
180*38fd1498Szrj   __builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
181*38fd1498Szrj 			      __hint);
182*38fd1498Szrj }
183*38fd1498Szrj 
184*38fd1498Szrj extern __inline void
185*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_ps(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)186*38fd1498Szrj _mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask16 __mask,
187*38fd1498Szrj 				    __m512i __index, int __scale, int __hint)
188*38fd1498Szrj {
189*38fd1498Szrj   __builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
190*38fd1498Szrj 			       __hint);
191*38fd1498Szrj }
192*38fd1498Szrj 
193*38fd1498Szrj #else
194*38fd1498Szrj #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)		     \
195*38fd1498Szrj   __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,	     \
196*38fd1498Szrj 			      (void const *)ADDR, (int)SCALE, (int)HINT)
197*38fd1498Szrj 
198*38fd1498Szrj #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)		     \
199*38fd1498Szrj   __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,    \
200*38fd1498Szrj 			      (void const *)ADDR, (int)SCALE, (int)HINT)
201*38fd1498Szrj 
202*38fd1498Szrj #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
203*38fd1498Szrj   __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
204*38fd1498Szrj 			      (void const *)ADDR, (int)SCALE, (int)HINT)
205*38fd1498Szrj 
206*38fd1498Szrj #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
207*38fd1498Szrj   __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
208*38fd1498Szrj 			      (void const *)ADDR, (int)SCALE, (int)HINT)
209*38fd1498Szrj 
210*38fd1498Szrj #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)		     \
211*38fd1498Szrj   __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
212*38fd1498Szrj 			      (void *)ADDR, (int)SCALE, (int)HINT)
213*38fd1498Szrj 
214*38fd1498Szrj #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)		     \
215*38fd1498Szrj   __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
216*38fd1498Szrj 			      (void *)ADDR, (int)SCALE, (int)HINT)
217*38fd1498Szrj 
218*38fd1498Szrj #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
219*38fd1498Szrj   __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
220*38fd1498Szrj 			      (void *)ADDR, (int)SCALE, (int)HINT)
221*38fd1498Szrj 
222*38fd1498Szrj #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
223*38fd1498Szrj   __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
224*38fd1498Szrj 			      (void *)ADDR, (int)SCALE, (int)HINT)
225*38fd1498Szrj 
226*38fd1498Szrj #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
227*38fd1498Szrj   __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
228*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
229*38fd1498Szrj 
230*38fd1498Szrj #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
231*38fd1498Szrj   __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
232*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
233*38fd1498Szrj 
234*38fd1498Szrj #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
235*38fd1498Szrj   __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
236*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
237*38fd1498Szrj 
238*38fd1498Szrj #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
239*38fd1498Szrj   __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
240*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
241*38fd1498Szrj 
242*38fd1498Szrj #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
243*38fd1498Szrj   __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
244*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
245*38fd1498Szrj 
246*38fd1498Szrj #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
247*38fd1498Szrj   __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
248*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
249*38fd1498Szrj 
250*38fd1498Szrj #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
251*38fd1498Szrj   __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
252*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
253*38fd1498Szrj 
254*38fd1498Szrj #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
255*38fd1498Szrj   __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
256*38fd1498Szrj 			       (void *)ADDR, (int)SCALE, (int)HINT)
257*38fd1498Szrj #endif
258*38fd1498Szrj 
259*38fd1498Szrj #ifdef __DISABLE_AVX512PF__
260*38fd1498Szrj #undef __DISABLE_AVX512PF__
261*38fd1498Szrj #pragma GCC pop_options
262*38fd1498Szrj #endif /* __DISABLE_AVX512PF__ */
263*38fd1498Szrj 
264*38fd1498Szrj #endif /* _AVX512PFINTRIN_H_INCLUDED */
265