1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj
3*38fd1498Szrj This file is part of GCC.
4*38fd1498Szrj
5*38fd1498Szrj GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj it under the terms of the GNU General Public License as published by
7*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj any later version.
9*38fd1498Szrj
10*38fd1498Szrj GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13*38fd1498Szrj GNU General Public License for more details.
14*38fd1498Szrj
15*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj 3.1, as published by the Free Software Foundation.
18*38fd1498Szrj
19*38fd1498Szrj You should have received a copy of the GNU General Public License and
20*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22*38fd1498Szrj <http://www.gnu.org/licenses/>. */
23*38fd1498Szrj
24*38fd1498Szrj #ifndef _IMMINTRIN_H_INCLUDED
25*38fd1498Szrj #error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
26*38fd1498Szrj #endif
27*38fd1498Szrj
28*38fd1498Szrj #ifndef _AVX512PFINTRIN_H_INCLUDED
29*38fd1498Szrj #define _AVX512PFINTRIN_H_INCLUDED
30*38fd1498Szrj
31*38fd1498Szrj #ifndef __AVX512PF__
32*38fd1498Szrj #pragma GCC push_options
33*38fd1498Szrj #pragma GCC target("avx512pf")
34*38fd1498Szrj #define __DISABLE_AVX512PF__
35*38fd1498Szrj #endif /* __AVX512PF__ */
36*38fd1498Szrj
37*38fd1498Szrj /* Internal data types for implementing the intrinsics. */
38*38fd1498Szrj typedef long long __v8di __attribute__ ((__vector_size__ (64)));
39*38fd1498Szrj typedef int __v16si __attribute__ ((__vector_size__ (64)));
40*38fd1498Szrj
41*38fd1498Szrj /* The Intel API is flexible enough that we must allow aliasing with other
42*38fd1498Szrj vector types, and their scalar components. */
43*38fd1498Szrj typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
44*38fd1498Szrj
45*38fd1498Szrj typedef unsigned char __mmask8;
46*38fd1498Szrj typedef unsigned short __mmask16;
47*38fd1498Szrj
48*38fd1498Szrj #ifdef __OPTIMIZE__
49*38fd1498Szrj extern __inline void
50*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_pd(__m256i __index,void const * __addr,int __scale,int __hint)51*38fd1498Szrj _mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
52*38fd1498Szrj int __scale, int __hint)
53*38fd1498Szrj {
54*38fd1498Szrj __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
55*38fd1498Szrj __scale, __hint);
56*38fd1498Szrj }
57*38fd1498Szrj
58*38fd1498Szrj extern __inline void
59*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32gather_ps(__m512i __index,void const * __addr,int __scale,int __hint)60*38fd1498Szrj _mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
61*38fd1498Szrj int __scale, int __hint)
62*38fd1498Szrj {
63*38fd1498Szrj __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
64*38fd1498Szrj __scale, __hint);
65*38fd1498Szrj }
66*38fd1498Szrj
67*38fd1498Szrj extern __inline void
68*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_pd(__m256i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)69*38fd1498Szrj _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
70*38fd1498Szrj void const *__addr, int __scale, int __hint)
71*38fd1498Szrj {
72*38fd1498Szrj __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
73*38fd1498Szrj __hint);
74*38fd1498Szrj }
75*38fd1498Szrj
76*38fd1498Szrj extern __inline void
77*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_ps(__m512i __index,__mmask16 __mask,void const * __addr,int __scale,int __hint)78*38fd1498Szrj _mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
79*38fd1498Szrj void const *__addr, int __scale, int __hint)
80*38fd1498Szrj {
81*38fd1498Szrj __builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
82*38fd1498Szrj __hint);
83*38fd1498Szrj }
84*38fd1498Szrj
85*38fd1498Szrj extern __inline void
86*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_pd(__m512i __index,void const * __addr,int __scale,int __hint)87*38fd1498Szrj _mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
88*38fd1498Szrj int __scale, int __hint)
89*38fd1498Szrj {
90*38fd1498Szrj __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
91*38fd1498Szrj __scale, __hint);
92*38fd1498Szrj }
93*38fd1498Szrj
94*38fd1498Szrj extern __inline void
95*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64gather_ps(__m512i __index,void const * __addr,int __scale,int __hint)96*38fd1498Szrj _mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
97*38fd1498Szrj int __scale, int __hint)
98*38fd1498Szrj {
99*38fd1498Szrj __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
100*38fd1498Szrj __scale, __hint);
101*38fd1498Szrj }
102*38fd1498Szrj
103*38fd1498Szrj extern __inline void
104*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_pd(__m512i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)105*38fd1498Szrj _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
106*38fd1498Szrj void const *__addr, int __scale, int __hint)
107*38fd1498Szrj {
108*38fd1498Szrj __builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
109*38fd1498Szrj __hint);
110*38fd1498Szrj }
111*38fd1498Szrj
112*38fd1498Szrj extern __inline void
113*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_ps(__m512i __index,__mmask8 __mask,void const * __addr,int __scale,int __hint)114*38fd1498Szrj _mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
115*38fd1498Szrj void const *__addr, int __scale, int __hint)
116*38fd1498Szrj {
117*38fd1498Szrj __builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
118*38fd1498Szrj __hint);
119*38fd1498Szrj }
120*38fd1498Szrj
121*38fd1498Szrj extern __inline void
122*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_pd(void * __addr,__m256i __index,int __scale,int __hint)123*38fd1498Szrj _mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
124*38fd1498Szrj int __hint)
125*38fd1498Szrj {
126*38fd1498Szrj __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
127*38fd1498Szrj __scale, __hint);
128*38fd1498Szrj }
129*38fd1498Szrj
130*38fd1498Szrj extern __inline void
131*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_ps(void * __addr,__m512i __index,int __scale,int __hint)132*38fd1498Szrj _mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
133*38fd1498Szrj int __hint)
134*38fd1498Szrj {
135*38fd1498Szrj __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
136*38fd1498Szrj __scale, __hint);
137*38fd1498Szrj }
138*38fd1498Szrj
139*38fd1498Szrj extern __inline void
140*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_pd(void * __addr,__mmask8 __mask,__m256i __index,int __scale,int __hint)141*38fd1498Szrj _mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
142*38fd1498Szrj __m256i __index, int __scale, int __hint)
143*38fd1498Szrj {
144*38fd1498Szrj __builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
145*38fd1498Szrj __hint);
146*38fd1498Szrj }
147*38fd1498Szrj
148*38fd1498Szrj extern __inline void
149*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32scatter_ps(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)150*38fd1498Szrj _mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
151*38fd1498Szrj __m512i __index, int __scale, int __hint)
152*38fd1498Szrj {
153*38fd1498Szrj __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
154*38fd1498Szrj __hint);
155*38fd1498Szrj }
156*38fd1498Szrj
157*38fd1498Szrj extern __inline void
158*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_pd(void * __addr,__m512i __index,int __scale,int __hint)159*38fd1498Szrj _mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
160*38fd1498Szrj int __hint)
161*38fd1498Szrj {
162*38fd1498Szrj __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
163*38fd1498Szrj __scale, __hint);
164*38fd1498Szrj }
165*38fd1498Szrj
166*38fd1498Szrj extern __inline void
167*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_ps(void * __addr,__m512i __index,int __scale,int __hint)168*38fd1498Szrj _mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
169*38fd1498Szrj int __hint)
170*38fd1498Szrj {
171*38fd1498Szrj __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
172*38fd1498Szrj __scale, __hint);
173*38fd1498Szrj }
174*38fd1498Szrj
175*38fd1498Szrj extern __inline void
176*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_pd(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)177*38fd1498Szrj _mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask16 __mask,
178*38fd1498Szrj __m512i __index, int __scale, int __hint)
179*38fd1498Szrj {
180*38fd1498Szrj __builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
181*38fd1498Szrj __hint);
182*38fd1498Szrj }
183*38fd1498Szrj
184*38fd1498Szrj extern __inline void
185*38fd1498Szrj __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_ps(void * __addr,__mmask16 __mask,__m512i __index,int __scale,int __hint)186*38fd1498Szrj _mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask16 __mask,
187*38fd1498Szrj __m512i __index, int __scale, int __hint)
188*38fd1498Szrj {
189*38fd1498Szrj __builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
190*38fd1498Szrj __hint);
191*38fd1498Szrj }
192*38fd1498Szrj
193*38fd1498Szrj #else
194*38fd1498Szrj #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
195*38fd1498Szrj __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
196*38fd1498Szrj (void const *)ADDR, (int)SCALE, (int)HINT)
197*38fd1498Szrj
198*38fd1498Szrj #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
199*38fd1498Szrj __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
200*38fd1498Szrj (void const *)ADDR, (int)SCALE, (int)HINT)
201*38fd1498Szrj
202*38fd1498Szrj #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
203*38fd1498Szrj __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
204*38fd1498Szrj (void const *)ADDR, (int)SCALE, (int)HINT)
205*38fd1498Szrj
206*38fd1498Szrj #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
207*38fd1498Szrj __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
208*38fd1498Szrj (void const *)ADDR, (int)SCALE, (int)HINT)
209*38fd1498Szrj
210*38fd1498Szrj #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
211*38fd1498Szrj __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
212*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
213*38fd1498Szrj
214*38fd1498Szrj #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
215*38fd1498Szrj __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
216*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
217*38fd1498Szrj
218*38fd1498Szrj #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
219*38fd1498Szrj __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
220*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
221*38fd1498Szrj
222*38fd1498Szrj #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
223*38fd1498Szrj __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
224*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
225*38fd1498Szrj
226*38fd1498Szrj #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
227*38fd1498Szrj __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
228*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
229*38fd1498Szrj
230*38fd1498Szrj #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
231*38fd1498Szrj __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
232*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
233*38fd1498Szrj
234*38fd1498Szrj #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
235*38fd1498Szrj __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
236*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
237*38fd1498Szrj
238*38fd1498Szrj #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
239*38fd1498Szrj __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
240*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
241*38fd1498Szrj
242*38fd1498Szrj #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
243*38fd1498Szrj __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
244*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
245*38fd1498Szrj
246*38fd1498Szrj #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
247*38fd1498Szrj __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
248*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
249*38fd1498Szrj
250*38fd1498Szrj #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
251*38fd1498Szrj __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
252*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
253*38fd1498Szrj
254*38fd1498Szrj #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
255*38fd1498Szrj __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
256*38fd1498Szrj (void *)ADDR, (int)SCALE, (int)HINT)
257*38fd1498Szrj #endif
258*38fd1498Szrj
259*38fd1498Szrj #ifdef __DISABLE_AVX512PF__
260*38fd1498Szrj #undef __DISABLE_AVX512PF__
261*38fd1498Szrj #pragma GCC pop_options
262*38fd1498Szrj #endif /* __DISABLE_AVX512PF__ */
263*38fd1498Szrj
264*38fd1498Szrj #endif /* _AVX512PFINTRIN_H_INCLUDED */
265