xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/avx512vnniintrin.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj #ifndef _IMMINTRIN_H_INCLUDED
25*38fd1498Szrj #error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
26*38fd1498Szrj #endif
27*38fd1498Szrj 
28*38fd1498Szrj #ifndef __AVX512VNNIINTRIN_H_INCLUDED
29*38fd1498Szrj #define __AVX512VNNIINTRIN_H_INCLUDED
30*38fd1498Szrj 
31*38fd1498Szrj #if !defined(__AVX512VNNI__)
32*38fd1498Szrj #pragma GCC push_options
33*38fd1498Szrj #pragma GCC target("avx512vnni")
34*38fd1498Szrj #define __DISABLE_AVX512VNNI__
35*38fd1498Szrj #endif /* __AVX512VNNI__ */
36*38fd1498Szrj 
37*38fd1498Szrj extern __inline __m512i
38*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusd_epi32(__m512i __A,__m512i __B,__m512i __C)39*38fd1498Szrj _mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
40*38fd1498Szrj {
41*38fd1498Szrj   return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
42*38fd1498Szrj 								(__v16si) __C);
43*38fd1498Szrj }
44*38fd1498Szrj 
45*38fd1498Szrj extern __inline __m512i
46*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpbusd_epi32(__m512i __A,__mmask16 __B,__m512i __C,__m512i __D)47*38fd1498Szrj _mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
48*38fd1498Szrj {
49*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
50*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__B);
51*38fd1498Szrj }
52*38fd1498Szrj 
53*38fd1498Szrj extern __inline __m512i
54*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbusd_epi32(__mmask16 __A,__m512i __B,__m512i __C,__m512i __D)55*38fd1498Szrj _mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
56*38fd1498Szrj 							__m512i __D)
57*38fd1498Szrj {
58*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
59*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__A);
60*38fd1498Szrj }
61*38fd1498Szrj 
62*38fd1498Szrj extern __inline __m512i
63*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusds_epi32(__m512i __A,__m512i __B,__m512i __C)64*38fd1498Szrj _mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
65*38fd1498Szrj {
66*38fd1498Szrj   return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
67*38fd1498Szrj 							 (__v16si) __C);
68*38fd1498Szrj }
69*38fd1498Szrj 
70*38fd1498Szrj extern __inline __m512i
71*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpbusds_epi32(__m512i __A,__mmask16 __B,__m512i __C,__m512i __D)72*38fd1498Szrj _mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
73*38fd1498Szrj 							__m512i __D)
74*38fd1498Szrj {
75*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
76*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__B);
77*38fd1498Szrj }
78*38fd1498Szrj 
79*38fd1498Szrj extern __inline __m512i
80*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbusds_epi32(__mmask16 __A,__m512i __B,__m512i __C,__m512i __D)81*38fd1498Szrj _mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
82*38fd1498Szrj 							__m512i __D)
83*38fd1498Szrj {
84*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
85*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__A);
86*38fd1498Szrj }
87*38fd1498Szrj 
88*38fd1498Szrj extern __inline __m512i
89*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssd_epi32(__m512i __A,__m512i __B,__m512i __C)90*38fd1498Szrj _mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
91*38fd1498Szrj {
92*38fd1498Szrj   return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
93*38fd1498Szrj 								(__v16si) __C);
94*38fd1498Szrj }
95*38fd1498Szrj 
96*38fd1498Szrj extern __inline __m512i
97*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpwssd_epi32(__m512i __A,__mmask16 __B,__m512i __C,__m512i __D)98*38fd1498Szrj _mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
99*38fd1498Szrj {
100*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
101*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__B);
102*38fd1498Szrj }
103*38fd1498Szrj 
104*38fd1498Szrj extern __inline __m512i
105*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpwssd_epi32(__mmask16 __A,__m512i __B,__m512i __C,__m512i __D)106*38fd1498Szrj _mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
107*38fd1498Szrj 							__m512i __D)
108*38fd1498Szrj {
109*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
110*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__A);
111*38fd1498Szrj }
112*38fd1498Szrj 
113*38fd1498Szrj extern __inline __m512i
114*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssds_epi32(__m512i __A,__m512i __B,__m512i __C)115*38fd1498Szrj _mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
116*38fd1498Szrj {
117*38fd1498Szrj   return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
118*38fd1498Szrj 								(__v16si) __C);
119*38fd1498Szrj }
120*38fd1498Szrj 
121*38fd1498Szrj extern __inline __m512i
122*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpwssds_epi32(__m512i __A,__mmask16 __B,__m512i __C,__m512i __D)123*38fd1498Szrj _mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
124*38fd1498Szrj 							__m512i __D)
125*38fd1498Szrj {
126*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
127*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__B);
128*38fd1498Szrj }
129*38fd1498Szrj 
130*38fd1498Szrj extern __inline __m512i
131*38fd1498Szrj __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpwssds_epi32(__mmask16 __A,__m512i __B,__m512i __C,__m512i __D)132*38fd1498Szrj _mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
133*38fd1498Szrj 							__m512i __D)
134*38fd1498Szrj {
135*38fd1498Szrj   return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
136*38fd1498Szrj 				(__v16si) __C, (__v16si) __D, (__mmask16)__A);
137*38fd1498Szrj }
138*38fd1498Szrj 
139*38fd1498Szrj #ifdef __DISABLE_AVX512VNNI__
140*38fd1498Szrj #undef __DISABLE_AVX512VNNI__
141*38fd1498Szrj #pragma GCC pop_options
142*38fd1498Szrj #endif /* __DISABLE_AVX512VNNI__ */
143*38fd1498Szrj 
144*38fd1498Szrj #endif /* __AVX512VNNIINTRIN_H_INCLUDED */
145