xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/tmmintrin.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2006-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj /* Implemented from the specification included in the Intel C++ Compiler
25*38fd1498Szrj    User Guide and Reference, version 9.1.  */
26*38fd1498Szrj 
27*38fd1498Szrj #ifndef _TMMINTRIN_H_INCLUDED
28*38fd1498Szrj #define _TMMINTRIN_H_INCLUDED
29*38fd1498Szrj 
30*38fd1498Szrj /* We need definitions from the SSE3, SSE2 and SSE header files*/
31*38fd1498Szrj #include <pmmintrin.h>
32*38fd1498Szrj 
33*38fd1498Szrj #ifndef __SSSE3__
34*38fd1498Szrj #pragma GCC push_options
35*38fd1498Szrj #pragma GCC target("ssse3")
36*38fd1498Szrj #define __DISABLE_SSSE3__
37*38fd1498Szrj #endif /* __SSSE3__ */
38*38fd1498Szrj 
39*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16(__m128i __X,__m128i __Y)40*38fd1498Szrj _mm_hadd_epi16 (__m128i __X, __m128i __Y)
41*38fd1498Szrj {
42*38fd1498Szrj   return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
43*38fd1498Szrj }
44*38fd1498Szrj 
45*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32(__m128i __X,__m128i __Y)46*38fd1498Szrj _mm_hadd_epi32 (__m128i __X, __m128i __Y)
47*38fd1498Szrj {
48*38fd1498Szrj   return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
49*38fd1498Szrj }
50*38fd1498Szrj 
51*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16(__m128i __X,__m128i __Y)52*38fd1498Szrj _mm_hadds_epi16 (__m128i __X, __m128i __Y)
53*38fd1498Szrj {
54*38fd1498Szrj   return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
55*38fd1498Szrj }
56*38fd1498Szrj 
57*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16(__m64 __X,__m64 __Y)58*38fd1498Szrj _mm_hadd_pi16 (__m64 __X, __m64 __Y)
59*38fd1498Szrj {
60*38fd1498Szrj   return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
61*38fd1498Szrj }
62*38fd1498Szrj 
63*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32(__m64 __X,__m64 __Y)64*38fd1498Szrj _mm_hadd_pi32 (__m64 __X, __m64 __Y)
65*38fd1498Szrj {
66*38fd1498Szrj   return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
67*38fd1498Szrj }
68*38fd1498Szrj 
69*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16(__m64 __X,__m64 __Y)70*38fd1498Szrj _mm_hadds_pi16 (__m64 __X, __m64 __Y)
71*38fd1498Szrj {
72*38fd1498Szrj   return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
73*38fd1498Szrj }
74*38fd1498Szrj 
75*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16(__m128i __X,__m128i __Y)76*38fd1498Szrj _mm_hsub_epi16 (__m128i __X, __m128i __Y)
77*38fd1498Szrj {
78*38fd1498Szrj   return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
79*38fd1498Szrj }
80*38fd1498Szrj 
81*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32(__m128i __X,__m128i __Y)82*38fd1498Szrj _mm_hsub_epi32 (__m128i __X, __m128i __Y)
83*38fd1498Szrj {
84*38fd1498Szrj   return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
85*38fd1498Szrj }
86*38fd1498Szrj 
87*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16(__m128i __X,__m128i __Y)88*38fd1498Szrj _mm_hsubs_epi16 (__m128i __X, __m128i __Y)
89*38fd1498Szrj {
90*38fd1498Szrj   return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
91*38fd1498Szrj }
92*38fd1498Szrj 
93*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16(__m64 __X,__m64 __Y)94*38fd1498Szrj _mm_hsub_pi16 (__m64 __X, __m64 __Y)
95*38fd1498Szrj {
96*38fd1498Szrj   return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
97*38fd1498Szrj }
98*38fd1498Szrj 
99*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32(__m64 __X,__m64 __Y)100*38fd1498Szrj _mm_hsub_pi32 (__m64 __X, __m64 __Y)
101*38fd1498Szrj {
102*38fd1498Szrj   return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
103*38fd1498Szrj }
104*38fd1498Szrj 
105*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16(__m64 __X,__m64 __Y)106*38fd1498Szrj _mm_hsubs_pi16 (__m64 __X, __m64 __Y)
107*38fd1498Szrj {
108*38fd1498Szrj   return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
109*38fd1498Szrj }
110*38fd1498Szrj 
111*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16(__m128i __X,__m128i __Y)112*38fd1498Szrj _mm_maddubs_epi16 (__m128i __X, __m128i __Y)
113*38fd1498Szrj {
114*38fd1498Szrj   return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
115*38fd1498Szrj }
116*38fd1498Szrj 
117*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16(__m64 __X,__m64 __Y)118*38fd1498Szrj _mm_maddubs_pi16 (__m64 __X, __m64 __Y)
119*38fd1498Szrj {
120*38fd1498Szrj   return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
121*38fd1498Szrj }
122*38fd1498Szrj 
123*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16(__m128i __X,__m128i __Y)124*38fd1498Szrj _mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
125*38fd1498Szrj {
126*38fd1498Szrj   return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
127*38fd1498Szrj }
128*38fd1498Szrj 
129*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16(__m64 __X,__m64 __Y)130*38fd1498Szrj _mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
131*38fd1498Szrj {
132*38fd1498Szrj   return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
133*38fd1498Szrj }
134*38fd1498Szrj 
135*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8(__m128i __X,__m128i __Y)136*38fd1498Szrj _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
137*38fd1498Szrj {
138*38fd1498Szrj   return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
139*38fd1498Szrj }
140*38fd1498Szrj 
141*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8(__m64 __X,__m64 __Y)142*38fd1498Szrj _mm_shuffle_pi8 (__m64 __X, __m64 __Y)
143*38fd1498Szrj {
144*38fd1498Szrj   return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
145*38fd1498Szrj }
146*38fd1498Szrj 
147*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8(__m128i __X,__m128i __Y)148*38fd1498Szrj _mm_sign_epi8 (__m128i __X, __m128i __Y)
149*38fd1498Szrj {
150*38fd1498Szrj   return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
151*38fd1498Szrj }
152*38fd1498Szrj 
153*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16(__m128i __X,__m128i __Y)154*38fd1498Szrj _mm_sign_epi16 (__m128i __X, __m128i __Y)
155*38fd1498Szrj {
156*38fd1498Szrj   return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
157*38fd1498Szrj }
158*38fd1498Szrj 
159*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32(__m128i __X,__m128i __Y)160*38fd1498Szrj _mm_sign_epi32 (__m128i __X, __m128i __Y)
161*38fd1498Szrj {
162*38fd1498Szrj   return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
163*38fd1498Szrj }
164*38fd1498Szrj 
165*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8(__m64 __X,__m64 __Y)166*38fd1498Szrj _mm_sign_pi8 (__m64 __X, __m64 __Y)
167*38fd1498Szrj {
168*38fd1498Szrj   return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
169*38fd1498Szrj }
170*38fd1498Szrj 
171*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16(__m64 __X,__m64 __Y)172*38fd1498Szrj _mm_sign_pi16 (__m64 __X, __m64 __Y)
173*38fd1498Szrj {
174*38fd1498Szrj   return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
175*38fd1498Szrj }
176*38fd1498Szrj 
177*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32(__m64 __X,__m64 __Y)178*38fd1498Szrj _mm_sign_pi32 (__m64 __X, __m64 __Y)
179*38fd1498Szrj {
180*38fd1498Szrj   return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
181*38fd1498Szrj }
182*38fd1498Szrj 
183*38fd1498Szrj #ifdef __OPTIMIZE__
184*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X,__m128i __Y,const int __N)185*38fd1498Szrj _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
186*38fd1498Szrj {
187*38fd1498Szrj   return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
188*38fd1498Szrj 					      (__v2di)__Y, __N * 8);
189*38fd1498Szrj }
190*38fd1498Szrj 
191*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X,__m64 __Y,const int __N)192*38fd1498Szrj _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
193*38fd1498Szrj {
194*38fd1498Szrj   return (__m64) __builtin_ia32_palignr ((__v1di)__X,
195*38fd1498Szrj 					 (__v1di)__Y, __N * 8);
196*38fd1498Szrj }
197*38fd1498Szrj #else
198*38fd1498Szrj #define _mm_alignr_epi8(X, Y, N)					\
199*38fd1498Szrj   ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X),		\
200*38fd1498Szrj 					(__v2di)(__m128i)(Y),		\
201*38fd1498Szrj 					(int)(N) * 8))
202*38fd1498Szrj #define _mm_alignr_pi8(X, Y, N)						\
203*38fd1498Szrj   ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X),			\
204*38fd1498Szrj 				   (__v1di)(__m64)(Y),			\
205*38fd1498Szrj 				   (int)(N) * 8))
206*38fd1498Szrj #endif
207*38fd1498Szrj 
208*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8(__m128i __X)209*38fd1498Szrj _mm_abs_epi8 (__m128i __X)
210*38fd1498Szrj {
211*38fd1498Szrj   return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
212*38fd1498Szrj }
213*38fd1498Szrj 
214*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16(__m128i __X)215*38fd1498Szrj _mm_abs_epi16 (__m128i __X)
216*38fd1498Szrj {
217*38fd1498Szrj   return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
218*38fd1498Szrj }
219*38fd1498Szrj 
220*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32(__m128i __X)221*38fd1498Szrj _mm_abs_epi32 (__m128i __X)
222*38fd1498Szrj {
223*38fd1498Szrj   return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
224*38fd1498Szrj }
225*38fd1498Szrj 
226*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8(__m64 __X)227*38fd1498Szrj _mm_abs_pi8 (__m64 __X)
228*38fd1498Szrj {
229*38fd1498Szrj   return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
230*38fd1498Szrj }
231*38fd1498Szrj 
232*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16(__m64 __X)233*38fd1498Szrj _mm_abs_pi16 (__m64 __X)
234*38fd1498Szrj {
235*38fd1498Szrj   return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
236*38fd1498Szrj }
237*38fd1498Szrj 
238*38fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32(__m64 __X)239*38fd1498Szrj _mm_abs_pi32 (__m64 __X)
240*38fd1498Szrj {
241*38fd1498Szrj   return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
242*38fd1498Szrj }
243*38fd1498Szrj 
244*38fd1498Szrj #ifdef __DISABLE_SSSE3__
245*38fd1498Szrj #undef __DISABLE_SSSE3__
246*38fd1498Szrj #pragma GCC pop_options
247*38fd1498Szrj #endif /* __DISABLE_SSSE3__ */
248*38fd1498Szrj 
249*38fd1498Szrj #endif /* _TMMINTRIN_H_INCLUDED */
250