1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.1. */
26
27 #ifndef _TMMINTRIN_H_INCLUDED
28 #define _TMMINTRIN_H_INCLUDED
29
30 #ifndef __SSSE3__
31 # error "SSSE3 instruction set not enabled"
32 #else
33
34 /* We need definitions from the SSE3, SSE2 and SSE header files*/
35 #include <pmmintrin.h>
36
37 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16(__m128i __X,__m128i __Y)38 _mm_hadd_epi16 (__m128i __X, __m128i __Y)
39 {
40 return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
41 }
42
43 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32(__m128i __X,__m128i __Y)44 _mm_hadd_epi32 (__m128i __X, __m128i __Y)
45 {
46 return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
47 }
48
49 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16(__m128i __X,__m128i __Y)50 _mm_hadds_epi16 (__m128i __X, __m128i __Y)
51 {
52 return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
53 }
54
55 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16(__m64 __X,__m64 __Y)56 _mm_hadd_pi16 (__m64 __X, __m64 __Y)
57 {
58 return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
59 }
60
61 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32(__m64 __X,__m64 __Y)62 _mm_hadd_pi32 (__m64 __X, __m64 __Y)
63 {
64 return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
65 }
66
67 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16(__m64 __X,__m64 __Y)68 _mm_hadds_pi16 (__m64 __X, __m64 __Y)
69 {
70 return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
71 }
72
73 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16(__m128i __X,__m128i __Y)74 _mm_hsub_epi16 (__m128i __X, __m128i __Y)
75 {
76 return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
77 }
78
79 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32(__m128i __X,__m128i __Y)80 _mm_hsub_epi32 (__m128i __X, __m128i __Y)
81 {
82 return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
83 }
84
85 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16(__m128i __X,__m128i __Y)86 _mm_hsubs_epi16 (__m128i __X, __m128i __Y)
87 {
88 return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
89 }
90
91 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16(__m64 __X,__m64 __Y)92 _mm_hsub_pi16 (__m64 __X, __m64 __Y)
93 {
94 return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
95 }
96
97 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32(__m64 __X,__m64 __Y)98 _mm_hsub_pi32 (__m64 __X, __m64 __Y)
99 {
100 return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
101 }
102
103 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16(__m64 __X,__m64 __Y)104 _mm_hsubs_pi16 (__m64 __X, __m64 __Y)
105 {
106 return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
107 }
108
109 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16(__m128i __X,__m128i __Y)110 _mm_maddubs_epi16 (__m128i __X, __m128i __Y)
111 {
112 return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
113 }
114
115 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16(__m64 __X,__m64 __Y)116 _mm_maddubs_pi16 (__m64 __X, __m64 __Y)
117 {
118 return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
119 }
120
121 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16(__m128i __X,__m128i __Y)122 _mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
123 {
124 return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
125 }
126
127 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16(__m64 __X,__m64 __Y)128 _mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
129 {
130 return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
131 }
132
133 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8(__m128i __X,__m128i __Y)134 _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
135 {
136 return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
137 }
138
139 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8(__m64 __X,__m64 __Y)140 _mm_shuffle_pi8 (__m64 __X, __m64 __Y)
141 {
142 return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
143 }
144
145 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8(__m128i __X,__m128i __Y)146 _mm_sign_epi8 (__m128i __X, __m128i __Y)
147 {
148 return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
149 }
150
151 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16(__m128i __X,__m128i __Y)152 _mm_sign_epi16 (__m128i __X, __m128i __Y)
153 {
154 return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
155 }
156
157 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32(__m128i __X,__m128i __Y)158 _mm_sign_epi32 (__m128i __X, __m128i __Y)
159 {
160 return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
161 }
162
163 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8(__m64 __X,__m64 __Y)164 _mm_sign_pi8 (__m64 __X, __m64 __Y)
165 {
166 return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
167 }
168
169 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16(__m64 __X,__m64 __Y)170 _mm_sign_pi16 (__m64 __X, __m64 __Y)
171 {
172 return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
173 }
174
175 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32(__m64 __X,__m64 __Y)176 _mm_sign_pi32 (__m64 __X, __m64 __Y)
177 {
178 return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
179 }
180
181 #ifdef __OPTIMIZE__
182 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __X,__m128i __Y,const int __N)183 _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
184 {
185 return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
186 (__v2di)__Y, __N * 8);
187 }
188
189 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __X,__m64 __Y,const int __N)190 _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
191 {
192 return (__m64) __builtin_ia32_palignr ((__v1di)__X,
193 (__v1di)__Y, __N * 8);
194 }
195 #else
196 #define _mm_alignr_epi8(X, Y, N) \
197 ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \
198 (__v2di)(__m128i)(Y), \
199 (int)(N) * 8))
200 #define _mm_alignr_pi8(X, Y, N) \
201 ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X), \
202 (__v1di)(__m64)(Y), \
203 (int)(N) * 8))
204 #endif
205
206 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8(__m128i __X)207 _mm_abs_epi8 (__m128i __X)
208 {
209 return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
210 }
211
212 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16(__m128i __X)213 _mm_abs_epi16 (__m128i __X)
214 {
215 return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
216 }
217
218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32(__m128i __X)219 _mm_abs_epi32 (__m128i __X)
220 {
221 return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
222 }
223
224 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8(__m64 __X)225 _mm_abs_pi8 (__m64 __X)
226 {
227 return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
228 }
229
230 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16(__m64 __X)231 _mm_abs_pi16 (__m64 __X)
232 {
233 return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
234 }
235
236 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32(__m64 __X)237 _mm_abs_pi32 (__m64 __X)
238 {
239 return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
240 }
241
242 #endif /* __SSSE3__ */
243
244 #endif /* _TMMINTRIN_H_INCLUDED */
245