1*f4a2713aSLionel Sambuc /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2*f4a2713aSLionel Sambuc *
3*f4a2713aSLionel Sambuc * Permission is hereby granted, free of charge, to any person obtaining a copy
4*f4a2713aSLionel Sambuc * of this software and associated documentation files (the "Software"), to deal
5*f4a2713aSLionel Sambuc * in the Software without restriction, including without limitation the rights
6*f4a2713aSLionel Sambuc * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*f4a2713aSLionel Sambuc * copies of the Software, and to permit persons to whom the Software is
8*f4a2713aSLionel Sambuc * furnished to do so, subject to the following conditions:
9*f4a2713aSLionel Sambuc *
10*f4a2713aSLionel Sambuc * The above copyright notice and this permission notice shall be included in
11*f4a2713aSLionel Sambuc * all copies or substantial portions of the Software.
12*f4a2713aSLionel Sambuc *
13*f4a2713aSLionel Sambuc * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*f4a2713aSLionel Sambuc * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*f4a2713aSLionel Sambuc * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*f4a2713aSLionel Sambuc * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*f4a2713aSLionel Sambuc * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*f4a2713aSLionel Sambuc * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*f4a2713aSLionel Sambuc * THE SOFTWARE.
20*f4a2713aSLionel Sambuc *
21*f4a2713aSLionel Sambuc *===-----------------------------------------------------------------------===
22*f4a2713aSLionel Sambuc */
23*f4a2713aSLionel Sambuc
24*f4a2713aSLionel Sambuc #ifndef __TMMINTRIN_H
25*f4a2713aSLionel Sambuc #define __TMMINTRIN_H
26*f4a2713aSLionel Sambuc
27*f4a2713aSLionel Sambuc #ifndef __SSSE3__
28*f4a2713aSLionel Sambuc #error "SSSE3 instruction set not enabled"
29*f4a2713aSLionel Sambuc #else
30*f4a2713aSLionel Sambuc
31*f4a2713aSLionel Sambuc #include <pmmintrin.h>
32*f4a2713aSLionel Sambuc
33*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi8(__m64 __a)34*f4a2713aSLionel Sambuc _mm_abs_pi8(__m64 __a)
35*f4a2713aSLionel Sambuc {
36*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
37*f4a2713aSLionel Sambuc }
38*f4a2713aSLionel Sambuc
39*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi8(__m128i __a)40*f4a2713aSLionel Sambuc _mm_abs_epi8(__m128i __a)
41*f4a2713aSLionel Sambuc {
42*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
43*f4a2713aSLionel Sambuc }
44*f4a2713aSLionel Sambuc
45*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi16(__m64 __a)46*f4a2713aSLionel Sambuc _mm_abs_pi16(__m64 __a)
47*f4a2713aSLionel Sambuc {
48*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
49*f4a2713aSLionel Sambuc }
50*f4a2713aSLionel Sambuc
51*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi16(__m128i __a)52*f4a2713aSLionel Sambuc _mm_abs_epi16(__m128i __a)
53*f4a2713aSLionel Sambuc {
54*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
55*f4a2713aSLionel Sambuc }
56*f4a2713aSLionel Sambuc
57*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi32(__m64 __a)58*f4a2713aSLionel Sambuc _mm_abs_pi32(__m64 __a)
59*f4a2713aSLionel Sambuc {
60*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pabsd((__v2si)__a);
61*f4a2713aSLionel Sambuc }
62*f4a2713aSLionel Sambuc
63*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi32(__m128i __a)64*f4a2713aSLionel Sambuc _mm_abs_epi32(__m128i __a)
65*f4a2713aSLionel Sambuc {
66*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
67*f4a2713aSLionel Sambuc }
68*f4a2713aSLionel Sambuc
69*f4a2713aSLionel Sambuc #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
70*f4a2713aSLionel Sambuc __m128i __a = (a); \
71*f4a2713aSLionel Sambuc __m128i __b = (b); \
72*f4a2713aSLionel Sambuc (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
73*f4a2713aSLionel Sambuc
74*f4a2713aSLionel Sambuc #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
75*f4a2713aSLionel Sambuc __m64 __a = (a); \
76*f4a2713aSLionel Sambuc __m64 __b = (b); \
77*f4a2713aSLionel Sambuc (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
78*f4a2713aSLionel Sambuc
79*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi16(__m128i __a,__m128i __b)80*f4a2713aSLionel Sambuc _mm_hadd_epi16(__m128i __a, __m128i __b)
81*f4a2713aSLionel Sambuc {
82*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
83*f4a2713aSLionel Sambuc }
84*f4a2713aSLionel Sambuc
85*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi32(__m128i __a,__m128i __b)86*f4a2713aSLionel Sambuc _mm_hadd_epi32(__m128i __a, __m128i __b)
87*f4a2713aSLionel Sambuc {
88*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
89*f4a2713aSLionel Sambuc }
90*f4a2713aSLionel Sambuc
91*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi16(__m64 __a,__m64 __b)92*f4a2713aSLionel Sambuc _mm_hadd_pi16(__m64 __a, __m64 __b)
93*f4a2713aSLionel Sambuc {
94*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
95*f4a2713aSLionel Sambuc }
96*f4a2713aSLionel Sambuc
97*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi32(__m64 __a,__m64 __b)98*f4a2713aSLionel Sambuc _mm_hadd_pi32(__m64 __a, __m64 __b)
99*f4a2713aSLionel Sambuc {
100*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
101*f4a2713aSLionel Sambuc }
102*f4a2713aSLionel Sambuc
103*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadds_epi16(__m128i __a,__m128i __b)104*f4a2713aSLionel Sambuc _mm_hadds_epi16(__m128i __a, __m128i __b)
105*f4a2713aSLionel Sambuc {
106*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
107*f4a2713aSLionel Sambuc }
108*f4a2713aSLionel Sambuc
109*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadds_pi16(__m64 __a,__m64 __b)110*f4a2713aSLionel Sambuc _mm_hadds_pi16(__m64 __a, __m64 __b)
111*f4a2713aSLionel Sambuc {
112*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
113*f4a2713aSLionel Sambuc }
114*f4a2713aSLionel Sambuc
115*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi16(__m128i __a,__m128i __b)116*f4a2713aSLionel Sambuc _mm_hsub_epi16(__m128i __a, __m128i __b)
117*f4a2713aSLionel Sambuc {
118*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
119*f4a2713aSLionel Sambuc }
120*f4a2713aSLionel Sambuc
121*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi32(__m128i __a,__m128i __b)122*f4a2713aSLionel Sambuc _mm_hsub_epi32(__m128i __a, __m128i __b)
123*f4a2713aSLionel Sambuc {
124*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
125*f4a2713aSLionel Sambuc }
126*f4a2713aSLionel Sambuc
127*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi16(__m64 __a,__m64 __b)128*f4a2713aSLionel Sambuc _mm_hsub_pi16(__m64 __a, __m64 __b)
129*f4a2713aSLionel Sambuc {
130*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
131*f4a2713aSLionel Sambuc }
132*f4a2713aSLionel Sambuc
133*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi32(__m64 __a,__m64 __b)134*f4a2713aSLionel Sambuc _mm_hsub_pi32(__m64 __a, __m64 __b)
135*f4a2713aSLionel Sambuc {
136*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
137*f4a2713aSLionel Sambuc }
138*f4a2713aSLionel Sambuc
139*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_epi16(__m128i __a,__m128i __b)140*f4a2713aSLionel Sambuc _mm_hsubs_epi16(__m128i __a, __m128i __b)
141*f4a2713aSLionel Sambuc {
142*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
143*f4a2713aSLionel Sambuc }
144*f4a2713aSLionel Sambuc
145*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_pi16(__m64 __a,__m64 __b)146*f4a2713aSLionel Sambuc _mm_hsubs_pi16(__m64 __a, __m64 __b)
147*f4a2713aSLionel Sambuc {
148*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
149*f4a2713aSLionel Sambuc }
150*f4a2713aSLionel Sambuc
151*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_epi16(__m128i __a,__m128i __b)152*f4a2713aSLionel Sambuc _mm_maddubs_epi16(__m128i __a, __m128i __b)
153*f4a2713aSLionel Sambuc {
154*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
155*f4a2713aSLionel Sambuc }
156*f4a2713aSLionel Sambuc
157*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_pi16(__m64 __a,__m64 __b)158*f4a2713aSLionel Sambuc _mm_maddubs_pi16(__m64 __a, __m64 __b)
159*f4a2713aSLionel Sambuc {
160*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
161*f4a2713aSLionel Sambuc }
162*f4a2713aSLionel Sambuc
163*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_epi16(__m128i __a,__m128i __b)164*f4a2713aSLionel Sambuc _mm_mulhrs_epi16(__m128i __a, __m128i __b)
165*f4a2713aSLionel Sambuc {
166*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
167*f4a2713aSLionel Sambuc }
168*f4a2713aSLionel Sambuc
169*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_pi16(__m64 __a,__m64 __b)170*f4a2713aSLionel Sambuc _mm_mulhrs_pi16(__m64 __a, __m64 __b)
171*f4a2713aSLionel Sambuc {
172*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
173*f4a2713aSLionel Sambuc }
174*f4a2713aSLionel Sambuc
175*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_epi8(__m128i __a,__m128i __b)176*f4a2713aSLionel Sambuc _mm_shuffle_epi8(__m128i __a, __m128i __b)
177*f4a2713aSLionel Sambuc {
178*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
179*f4a2713aSLionel Sambuc }
180*f4a2713aSLionel Sambuc
181*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_pi8(__m64 __a,__m64 __b)182*f4a2713aSLionel Sambuc _mm_shuffle_pi8(__m64 __a, __m64 __b)
183*f4a2713aSLionel Sambuc {
184*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
185*f4a2713aSLionel Sambuc }
186*f4a2713aSLionel Sambuc
187*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi8(__m128i __a,__m128i __b)188*f4a2713aSLionel Sambuc _mm_sign_epi8(__m128i __a, __m128i __b)
189*f4a2713aSLionel Sambuc {
190*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
191*f4a2713aSLionel Sambuc }
192*f4a2713aSLionel Sambuc
193*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi16(__m128i __a,__m128i __b)194*f4a2713aSLionel Sambuc _mm_sign_epi16(__m128i __a, __m128i __b)
195*f4a2713aSLionel Sambuc {
196*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
197*f4a2713aSLionel Sambuc }
198*f4a2713aSLionel Sambuc
199*f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi32(__m128i __a,__m128i __b)200*f4a2713aSLionel Sambuc _mm_sign_epi32(__m128i __a, __m128i __b)
201*f4a2713aSLionel Sambuc {
202*f4a2713aSLionel Sambuc return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
203*f4a2713aSLionel Sambuc }
204*f4a2713aSLionel Sambuc
205*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi8(__m64 __a,__m64 __b)206*f4a2713aSLionel Sambuc _mm_sign_pi8(__m64 __a, __m64 __b)
207*f4a2713aSLionel Sambuc {
208*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
209*f4a2713aSLionel Sambuc }
210*f4a2713aSLionel Sambuc
211*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi16(__m64 __a,__m64 __b)212*f4a2713aSLionel Sambuc _mm_sign_pi16(__m64 __a, __m64 __b)
213*f4a2713aSLionel Sambuc {
214*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
215*f4a2713aSLionel Sambuc }
216*f4a2713aSLionel Sambuc
217*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi32(__m64 __a,__m64 __b)218*f4a2713aSLionel Sambuc _mm_sign_pi32(__m64 __a, __m64 __b)
219*f4a2713aSLionel Sambuc {
220*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
221*f4a2713aSLionel Sambuc }
222*f4a2713aSLionel Sambuc
223*f4a2713aSLionel Sambuc #endif /* __SSSE3__ */
224*f4a2713aSLionel Sambuc
225*f4a2713aSLionel Sambuc #endif /* __TMMINTRIN_H */
226