1*f4a2713aSLionel Sambuc /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2*f4a2713aSLionel Sambuc *
3*f4a2713aSLionel Sambuc * Permission is hereby granted, free of charge, to any person obtaining a copy
4*f4a2713aSLionel Sambuc * of this software and associated documentation files (the "Software"), to deal
5*f4a2713aSLionel Sambuc * in the Software without restriction, including without limitation the rights
6*f4a2713aSLionel Sambuc * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*f4a2713aSLionel Sambuc * copies of the Software, and to permit persons to whom the Software is
8*f4a2713aSLionel Sambuc * furnished to do so, subject to the following conditions:
9*f4a2713aSLionel Sambuc *
10*f4a2713aSLionel Sambuc * The above copyright notice and this permission notice shall be included in
11*f4a2713aSLionel Sambuc * all copies or substantial portions of the Software.
12*f4a2713aSLionel Sambuc *
13*f4a2713aSLionel Sambuc * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*f4a2713aSLionel Sambuc * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*f4a2713aSLionel Sambuc * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*f4a2713aSLionel Sambuc * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*f4a2713aSLionel Sambuc * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*f4a2713aSLionel Sambuc * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*f4a2713aSLionel Sambuc * THE SOFTWARE.
20*f4a2713aSLionel Sambuc *
21*f4a2713aSLionel Sambuc *===-----------------------------------------------------------------------===
22*f4a2713aSLionel Sambuc */
23*f4a2713aSLionel Sambuc
24*f4a2713aSLionel Sambuc #ifndef __MMINTRIN_H
25*f4a2713aSLionel Sambuc #define __MMINTRIN_H
26*f4a2713aSLionel Sambuc
27*f4a2713aSLionel Sambuc #ifndef __MMX__
28*f4a2713aSLionel Sambuc #error "MMX instruction set not enabled"
29*f4a2713aSLionel Sambuc #else
30*f4a2713aSLionel Sambuc
31*f4a2713aSLionel Sambuc typedef long long __m64 __attribute__((__vector_size__(8)));
32*f4a2713aSLionel Sambuc
33*f4a2713aSLionel Sambuc typedef int __v2si __attribute__((__vector_size__(8)));
34*f4a2713aSLionel Sambuc typedef short __v4hi __attribute__((__vector_size__(8)));
35*f4a2713aSLionel Sambuc typedef char __v8qi __attribute__((__vector_size__(8)));
36*f4a2713aSLionel Sambuc
37*f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_empty(void)38*f4a2713aSLionel Sambuc _mm_empty(void)
39*f4a2713aSLionel Sambuc {
40*f4a2713aSLionel Sambuc __builtin_ia32_emms();
41*f4a2713aSLionel Sambuc }
42*f4a2713aSLionel Sambuc
43*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi32_si64(int __i)44*f4a2713aSLionel Sambuc _mm_cvtsi32_si64(int __i)
45*f4a2713aSLionel Sambuc {
46*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
47*f4a2713aSLionel Sambuc }
48*f4a2713aSLionel Sambuc
49*f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_si32(__m64 __m)50*f4a2713aSLionel Sambuc _mm_cvtsi64_si32(__m64 __m)
51*f4a2713aSLionel Sambuc {
52*f4a2713aSLionel Sambuc return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
53*f4a2713aSLionel Sambuc }
54*f4a2713aSLionel Sambuc
55*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_m64(long long __i)56*f4a2713aSLionel Sambuc _mm_cvtsi64_m64(long long __i)
57*f4a2713aSLionel Sambuc {
58*f4a2713aSLionel Sambuc return (__m64)__i;
59*f4a2713aSLionel Sambuc }
60*f4a2713aSLionel Sambuc
61*f4a2713aSLionel Sambuc static __inline__ long long __attribute__((__always_inline__, __nodebug__))
_mm_cvtm64_si64(__m64 __m)62*f4a2713aSLionel Sambuc _mm_cvtm64_si64(__m64 __m)
63*f4a2713aSLionel Sambuc {
64*f4a2713aSLionel Sambuc return (long long)__m;
65*f4a2713aSLionel Sambuc }
66*f4a2713aSLionel Sambuc
67*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_packs_pi16(__m64 __m1,__m64 __m2)68*f4a2713aSLionel Sambuc _mm_packs_pi16(__m64 __m1, __m64 __m2)
69*f4a2713aSLionel Sambuc {
70*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
71*f4a2713aSLionel Sambuc }
72*f4a2713aSLionel Sambuc
73*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_packs_pi32(__m64 __m1,__m64 __m2)74*f4a2713aSLionel Sambuc _mm_packs_pi32(__m64 __m1, __m64 __m2)
75*f4a2713aSLionel Sambuc {
76*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
77*f4a2713aSLionel Sambuc }
78*f4a2713aSLionel Sambuc
79*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_packs_pu16(__m64 __m1,__m64 __m2)80*f4a2713aSLionel Sambuc _mm_packs_pu16(__m64 __m1, __m64 __m2)
81*f4a2713aSLionel Sambuc {
82*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
83*f4a2713aSLionel Sambuc }
84*f4a2713aSLionel Sambuc
85*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_pi8(__m64 __m1,__m64 __m2)86*f4a2713aSLionel Sambuc _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
87*f4a2713aSLionel Sambuc {
88*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
89*f4a2713aSLionel Sambuc }
90*f4a2713aSLionel Sambuc
91*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_pi16(__m64 __m1,__m64 __m2)92*f4a2713aSLionel Sambuc _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
93*f4a2713aSLionel Sambuc {
94*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
95*f4a2713aSLionel Sambuc }
96*f4a2713aSLionel Sambuc
97*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_pi32(__m64 __m1,__m64 __m2)98*f4a2713aSLionel Sambuc _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
99*f4a2713aSLionel Sambuc {
100*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
101*f4a2713aSLionel Sambuc }
102*f4a2713aSLionel Sambuc
103*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_pi8(__m64 __m1,__m64 __m2)104*f4a2713aSLionel Sambuc _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
105*f4a2713aSLionel Sambuc {
106*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
107*f4a2713aSLionel Sambuc }
108*f4a2713aSLionel Sambuc
109*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_pi16(__m64 __m1,__m64 __m2)110*f4a2713aSLionel Sambuc _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
111*f4a2713aSLionel Sambuc {
112*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
113*f4a2713aSLionel Sambuc }
114*f4a2713aSLionel Sambuc
115*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_pi32(__m64 __m1,__m64 __m2)116*f4a2713aSLionel Sambuc _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
117*f4a2713aSLionel Sambuc {
118*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
119*f4a2713aSLionel Sambuc }
120*f4a2713aSLionel Sambuc
121*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_add_pi8(__m64 __m1,__m64 __m2)122*f4a2713aSLionel Sambuc _mm_add_pi8(__m64 __m1, __m64 __m2)
123*f4a2713aSLionel Sambuc {
124*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
125*f4a2713aSLionel Sambuc }
126*f4a2713aSLionel Sambuc
127*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_add_pi16(__m64 __m1,__m64 __m2)128*f4a2713aSLionel Sambuc _mm_add_pi16(__m64 __m1, __m64 __m2)
129*f4a2713aSLionel Sambuc {
130*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
131*f4a2713aSLionel Sambuc }
132*f4a2713aSLionel Sambuc
133*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_add_pi32(__m64 __m1,__m64 __m2)134*f4a2713aSLionel Sambuc _mm_add_pi32(__m64 __m1, __m64 __m2)
135*f4a2713aSLionel Sambuc {
136*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
137*f4a2713aSLionel Sambuc }
138*f4a2713aSLionel Sambuc
139*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_adds_pi8(__m64 __m1,__m64 __m2)140*f4a2713aSLionel Sambuc _mm_adds_pi8(__m64 __m1, __m64 __m2)
141*f4a2713aSLionel Sambuc {
142*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
143*f4a2713aSLionel Sambuc }
144*f4a2713aSLionel Sambuc
145*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_adds_pi16(__m64 __m1,__m64 __m2)146*f4a2713aSLionel Sambuc _mm_adds_pi16(__m64 __m1, __m64 __m2)
147*f4a2713aSLionel Sambuc {
148*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
149*f4a2713aSLionel Sambuc }
150*f4a2713aSLionel Sambuc
151*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_adds_pu8(__m64 __m1,__m64 __m2)152*f4a2713aSLionel Sambuc _mm_adds_pu8(__m64 __m1, __m64 __m2)
153*f4a2713aSLionel Sambuc {
154*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
155*f4a2713aSLionel Sambuc }
156*f4a2713aSLionel Sambuc
157*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_adds_pu16(__m64 __m1,__m64 __m2)158*f4a2713aSLionel Sambuc _mm_adds_pu16(__m64 __m1, __m64 __m2)
159*f4a2713aSLionel Sambuc {
160*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
161*f4a2713aSLionel Sambuc }
162*f4a2713aSLionel Sambuc
163*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sub_pi8(__m64 __m1,__m64 __m2)164*f4a2713aSLionel Sambuc _mm_sub_pi8(__m64 __m1, __m64 __m2)
165*f4a2713aSLionel Sambuc {
166*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
167*f4a2713aSLionel Sambuc }
168*f4a2713aSLionel Sambuc
169*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sub_pi16(__m64 __m1,__m64 __m2)170*f4a2713aSLionel Sambuc _mm_sub_pi16(__m64 __m1, __m64 __m2)
171*f4a2713aSLionel Sambuc {
172*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
173*f4a2713aSLionel Sambuc }
174*f4a2713aSLionel Sambuc
175*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sub_pi32(__m64 __m1,__m64 __m2)176*f4a2713aSLionel Sambuc _mm_sub_pi32(__m64 __m1, __m64 __m2)
177*f4a2713aSLionel Sambuc {
178*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
179*f4a2713aSLionel Sambuc }
180*f4a2713aSLionel Sambuc
181*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_subs_pi8(__m64 __m1,__m64 __m2)182*f4a2713aSLionel Sambuc _mm_subs_pi8(__m64 __m1, __m64 __m2)
183*f4a2713aSLionel Sambuc {
184*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
185*f4a2713aSLionel Sambuc }
186*f4a2713aSLionel Sambuc
187*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_subs_pi16(__m64 __m1,__m64 __m2)188*f4a2713aSLionel Sambuc _mm_subs_pi16(__m64 __m1, __m64 __m2)
189*f4a2713aSLionel Sambuc {
190*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
191*f4a2713aSLionel Sambuc }
192*f4a2713aSLionel Sambuc
193*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_subs_pu8(__m64 __m1,__m64 __m2)194*f4a2713aSLionel Sambuc _mm_subs_pu8(__m64 __m1, __m64 __m2)
195*f4a2713aSLionel Sambuc {
196*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
197*f4a2713aSLionel Sambuc }
198*f4a2713aSLionel Sambuc
199*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_subs_pu16(__m64 __m1,__m64 __m2)200*f4a2713aSLionel Sambuc _mm_subs_pu16(__m64 __m1, __m64 __m2)
201*f4a2713aSLionel Sambuc {
202*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
203*f4a2713aSLionel Sambuc }
204*f4a2713aSLionel Sambuc
205*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_madd_pi16(__m64 __m1,__m64 __m2)206*f4a2713aSLionel Sambuc _mm_madd_pi16(__m64 __m1, __m64 __m2)
207*f4a2713aSLionel Sambuc {
208*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
209*f4a2713aSLionel Sambuc }
210*f4a2713aSLionel Sambuc
211*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mulhi_pi16(__m64 __m1,__m64 __m2)212*f4a2713aSLionel Sambuc _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
213*f4a2713aSLionel Sambuc {
214*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
215*f4a2713aSLionel Sambuc }
216*f4a2713aSLionel Sambuc
217*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mullo_pi16(__m64 __m1,__m64 __m2)218*f4a2713aSLionel Sambuc _mm_mullo_pi16(__m64 __m1, __m64 __m2)
219*f4a2713aSLionel Sambuc {
220*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
221*f4a2713aSLionel Sambuc }
222*f4a2713aSLionel Sambuc
223*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sll_pi16(__m64 __m,__m64 __count)224*f4a2713aSLionel Sambuc _mm_sll_pi16(__m64 __m, __m64 __count)
225*f4a2713aSLionel Sambuc {
226*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
227*f4a2713aSLionel Sambuc }
228*f4a2713aSLionel Sambuc
229*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_slli_pi16(__m64 __m,int __count)230*f4a2713aSLionel Sambuc _mm_slli_pi16(__m64 __m, int __count)
231*f4a2713aSLionel Sambuc {
232*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
233*f4a2713aSLionel Sambuc }
234*f4a2713aSLionel Sambuc
235*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sll_pi32(__m64 __m,__m64 __count)236*f4a2713aSLionel Sambuc _mm_sll_pi32(__m64 __m, __m64 __count)
237*f4a2713aSLionel Sambuc {
238*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
239*f4a2713aSLionel Sambuc }
240*f4a2713aSLionel Sambuc
241*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_slli_pi32(__m64 __m,int __count)242*f4a2713aSLionel Sambuc _mm_slli_pi32(__m64 __m, int __count)
243*f4a2713aSLionel Sambuc {
244*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
245*f4a2713aSLionel Sambuc }
246*f4a2713aSLionel Sambuc
247*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sll_si64(__m64 __m,__m64 __count)248*f4a2713aSLionel Sambuc _mm_sll_si64(__m64 __m, __m64 __count)
249*f4a2713aSLionel Sambuc {
250*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psllq(__m, __count);
251*f4a2713aSLionel Sambuc }
252*f4a2713aSLionel Sambuc
253*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_slli_si64(__m64 __m,int __count)254*f4a2713aSLionel Sambuc _mm_slli_si64(__m64 __m, int __count)
255*f4a2713aSLionel Sambuc {
256*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psllqi(__m, __count);
257*f4a2713aSLionel Sambuc }
258*f4a2713aSLionel Sambuc
259*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sra_pi16(__m64 __m,__m64 __count)260*f4a2713aSLionel Sambuc _mm_sra_pi16(__m64 __m, __m64 __count)
261*f4a2713aSLionel Sambuc {
262*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
263*f4a2713aSLionel Sambuc }
264*f4a2713aSLionel Sambuc
265*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srai_pi16(__m64 __m,int __count)266*f4a2713aSLionel Sambuc _mm_srai_pi16(__m64 __m, int __count)
267*f4a2713aSLionel Sambuc {
268*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
269*f4a2713aSLionel Sambuc }
270*f4a2713aSLionel Sambuc
271*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sra_pi32(__m64 __m,__m64 __count)272*f4a2713aSLionel Sambuc _mm_sra_pi32(__m64 __m, __m64 __count)
273*f4a2713aSLionel Sambuc {
274*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
275*f4a2713aSLionel Sambuc }
276*f4a2713aSLionel Sambuc
277*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srai_pi32(__m64 __m,int __count)278*f4a2713aSLionel Sambuc _mm_srai_pi32(__m64 __m, int __count)
279*f4a2713aSLionel Sambuc {
280*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
281*f4a2713aSLionel Sambuc }
282*f4a2713aSLionel Sambuc
283*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srl_pi16(__m64 __m,__m64 __count)284*f4a2713aSLionel Sambuc _mm_srl_pi16(__m64 __m, __m64 __count)
285*f4a2713aSLionel Sambuc {
286*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
287*f4a2713aSLionel Sambuc }
288*f4a2713aSLionel Sambuc
289*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srli_pi16(__m64 __m,int __count)290*f4a2713aSLionel Sambuc _mm_srli_pi16(__m64 __m, int __count)
291*f4a2713aSLionel Sambuc {
292*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
293*f4a2713aSLionel Sambuc }
294*f4a2713aSLionel Sambuc
295*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srl_pi32(__m64 __m,__m64 __count)296*f4a2713aSLionel Sambuc _mm_srl_pi32(__m64 __m, __m64 __count)
297*f4a2713aSLionel Sambuc {
298*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
299*f4a2713aSLionel Sambuc }
300*f4a2713aSLionel Sambuc
301*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srli_pi32(__m64 __m,int __count)302*f4a2713aSLionel Sambuc _mm_srli_pi32(__m64 __m, int __count)
303*f4a2713aSLionel Sambuc {
304*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
305*f4a2713aSLionel Sambuc }
306*f4a2713aSLionel Sambuc
307*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srl_si64(__m64 __m,__m64 __count)308*f4a2713aSLionel Sambuc _mm_srl_si64(__m64 __m, __m64 __count)
309*f4a2713aSLionel Sambuc {
310*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrlq(__m, __count);
311*f4a2713aSLionel Sambuc }
312*f4a2713aSLionel Sambuc
313*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_srli_si64(__m64 __m,int __count)314*f4a2713aSLionel Sambuc _mm_srli_si64(__m64 __m, int __count)
315*f4a2713aSLionel Sambuc {
316*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_psrlqi(__m, __count);
317*f4a2713aSLionel Sambuc }
318*f4a2713aSLionel Sambuc
319*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_and_si64(__m64 __m1,__m64 __m2)320*f4a2713aSLionel Sambuc _mm_and_si64(__m64 __m1, __m64 __m2)
321*f4a2713aSLionel Sambuc {
322*f4a2713aSLionel Sambuc return __builtin_ia32_pand(__m1, __m2);
323*f4a2713aSLionel Sambuc }
324*f4a2713aSLionel Sambuc
325*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_andnot_si64(__m64 __m1,__m64 __m2)326*f4a2713aSLionel Sambuc _mm_andnot_si64(__m64 __m1, __m64 __m2)
327*f4a2713aSLionel Sambuc {
328*f4a2713aSLionel Sambuc return __builtin_ia32_pandn(__m1, __m2);
329*f4a2713aSLionel Sambuc }
330*f4a2713aSLionel Sambuc
331*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_or_si64(__m64 __m1,__m64 __m2)332*f4a2713aSLionel Sambuc _mm_or_si64(__m64 __m1, __m64 __m2)
333*f4a2713aSLionel Sambuc {
334*f4a2713aSLionel Sambuc return __builtin_ia32_por(__m1, __m2);
335*f4a2713aSLionel Sambuc }
336*f4a2713aSLionel Sambuc
337*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_xor_si64(__m64 __m1,__m64 __m2)338*f4a2713aSLionel Sambuc _mm_xor_si64(__m64 __m1, __m64 __m2)
339*f4a2713aSLionel Sambuc {
340*f4a2713aSLionel Sambuc return __builtin_ia32_pxor(__m1, __m2);
341*f4a2713aSLionel Sambuc }
342*f4a2713aSLionel Sambuc
343*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi8(__m64 __m1,__m64 __m2)344*f4a2713aSLionel Sambuc _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
345*f4a2713aSLionel Sambuc {
346*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
347*f4a2713aSLionel Sambuc }
348*f4a2713aSLionel Sambuc
349*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi16(__m64 __m1,__m64 __m2)350*f4a2713aSLionel Sambuc _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
351*f4a2713aSLionel Sambuc {
352*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
353*f4a2713aSLionel Sambuc }
354*f4a2713aSLionel Sambuc
355*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pi32(__m64 __m1,__m64 __m2)356*f4a2713aSLionel Sambuc _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
357*f4a2713aSLionel Sambuc {
358*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
359*f4a2713aSLionel Sambuc }
360*f4a2713aSLionel Sambuc
361*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi8(__m64 __m1,__m64 __m2)362*f4a2713aSLionel Sambuc _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
363*f4a2713aSLionel Sambuc {
364*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
365*f4a2713aSLionel Sambuc }
366*f4a2713aSLionel Sambuc
367*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi16(__m64 __m1,__m64 __m2)368*f4a2713aSLionel Sambuc _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
369*f4a2713aSLionel Sambuc {
370*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
371*f4a2713aSLionel Sambuc }
372*f4a2713aSLionel Sambuc
373*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pi32(__m64 __m1,__m64 __m2)374*f4a2713aSLionel Sambuc _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
375*f4a2713aSLionel Sambuc {
376*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
377*f4a2713aSLionel Sambuc }
378*f4a2713aSLionel Sambuc
379*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_setzero_si64(void)380*f4a2713aSLionel Sambuc _mm_setzero_si64(void)
381*f4a2713aSLionel Sambuc {
382*f4a2713aSLionel Sambuc return (__m64){ 0LL };
383*f4a2713aSLionel Sambuc }
384*f4a2713aSLionel Sambuc
385*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set_pi32(int __i1,int __i0)386*f4a2713aSLionel Sambuc _mm_set_pi32(int __i1, int __i0)
387*f4a2713aSLionel Sambuc {
388*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
389*f4a2713aSLionel Sambuc }
390*f4a2713aSLionel Sambuc
391*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set_pi16(short __s3,short __s2,short __s1,short __s0)392*f4a2713aSLionel Sambuc _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
393*f4a2713aSLionel Sambuc {
394*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
395*f4a2713aSLionel Sambuc }
396*f4a2713aSLionel Sambuc
397*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set_pi8(char __b7,char __b6,char __b5,char __b4,char __b3,char __b2,char __b1,char __b0)398*f4a2713aSLionel Sambuc _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
399*f4a2713aSLionel Sambuc char __b1, char __b0)
400*f4a2713aSLionel Sambuc {
401*f4a2713aSLionel Sambuc return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
402*f4a2713aSLionel Sambuc __b4, __b5, __b6, __b7);
403*f4a2713aSLionel Sambuc }
404*f4a2713aSLionel Sambuc
405*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set1_pi32(int __i)406*f4a2713aSLionel Sambuc _mm_set1_pi32(int __i)
407*f4a2713aSLionel Sambuc {
408*f4a2713aSLionel Sambuc return _mm_set_pi32(__i, __i);
409*f4a2713aSLionel Sambuc }
410*f4a2713aSLionel Sambuc
411*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set1_pi16(short __w)412*f4a2713aSLionel Sambuc _mm_set1_pi16(short __w)
413*f4a2713aSLionel Sambuc {
414*f4a2713aSLionel Sambuc return _mm_set_pi16(__w, __w, __w, __w);
415*f4a2713aSLionel Sambuc }
416*f4a2713aSLionel Sambuc
417*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set1_pi8(char __b)418*f4a2713aSLionel Sambuc _mm_set1_pi8(char __b)
419*f4a2713aSLionel Sambuc {
420*f4a2713aSLionel Sambuc return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
421*f4a2713aSLionel Sambuc }
422*f4a2713aSLionel Sambuc
423*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_setr_pi32(int __i0,int __i1)424*f4a2713aSLionel Sambuc _mm_setr_pi32(int __i0, int __i1)
425*f4a2713aSLionel Sambuc {
426*f4a2713aSLionel Sambuc return _mm_set_pi32(__i1, __i0);
427*f4a2713aSLionel Sambuc }
428*f4a2713aSLionel Sambuc
429*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_setr_pi16(short __w0,short __w1,short __w2,short __w3)430*f4a2713aSLionel Sambuc _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
431*f4a2713aSLionel Sambuc {
432*f4a2713aSLionel Sambuc return _mm_set_pi16(__w3, __w2, __w1, __w0);
433*f4a2713aSLionel Sambuc }
434*f4a2713aSLionel Sambuc
435*f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_setr_pi8(char __b0,char __b1,char __b2,char __b3,char __b4,char __b5,char __b6,char __b7)436*f4a2713aSLionel Sambuc _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
437*f4a2713aSLionel Sambuc char __b6, char __b7)
438*f4a2713aSLionel Sambuc {
439*f4a2713aSLionel Sambuc return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
440*f4a2713aSLionel Sambuc }
441*f4a2713aSLionel Sambuc
442*f4a2713aSLionel Sambuc
443*f4a2713aSLionel Sambuc /* Aliases for compatibility. */
444*f4a2713aSLionel Sambuc #define _m_empty _mm_empty
445*f4a2713aSLionel Sambuc #define _m_from_int _mm_cvtsi32_si64
446*f4a2713aSLionel Sambuc #define _m_to_int _mm_cvtsi64_si32
447*f4a2713aSLionel Sambuc #define _m_packsswb _mm_packs_pi16
448*f4a2713aSLionel Sambuc #define _m_packssdw _mm_packs_pi32
449*f4a2713aSLionel Sambuc #define _m_packuswb _mm_packs_pu16
450*f4a2713aSLionel Sambuc #define _m_punpckhbw _mm_unpackhi_pi8
451*f4a2713aSLionel Sambuc #define _m_punpckhwd _mm_unpackhi_pi16
452*f4a2713aSLionel Sambuc #define _m_punpckhdq _mm_unpackhi_pi32
453*f4a2713aSLionel Sambuc #define _m_punpcklbw _mm_unpacklo_pi8
454*f4a2713aSLionel Sambuc #define _m_punpcklwd _mm_unpacklo_pi16
455*f4a2713aSLionel Sambuc #define _m_punpckldq _mm_unpacklo_pi32
456*f4a2713aSLionel Sambuc #define _m_paddb _mm_add_pi8
457*f4a2713aSLionel Sambuc #define _m_paddw _mm_add_pi16
458*f4a2713aSLionel Sambuc #define _m_paddd _mm_add_pi32
459*f4a2713aSLionel Sambuc #define _m_paddsb _mm_adds_pi8
460*f4a2713aSLionel Sambuc #define _m_paddsw _mm_adds_pi16
461*f4a2713aSLionel Sambuc #define _m_paddusb _mm_adds_pu8
462*f4a2713aSLionel Sambuc #define _m_paddusw _mm_adds_pu16
463*f4a2713aSLionel Sambuc #define _m_psubb _mm_sub_pi8
464*f4a2713aSLionel Sambuc #define _m_psubw _mm_sub_pi16
465*f4a2713aSLionel Sambuc #define _m_psubd _mm_sub_pi32
466*f4a2713aSLionel Sambuc #define _m_psubsb _mm_subs_pi8
467*f4a2713aSLionel Sambuc #define _m_psubsw _mm_subs_pi16
468*f4a2713aSLionel Sambuc #define _m_psubusb _mm_subs_pu8
469*f4a2713aSLionel Sambuc #define _m_psubusw _mm_subs_pu16
470*f4a2713aSLionel Sambuc #define _m_pmaddwd _mm_madd_pi16
471*f4a2713aSLionel Sambuc #define _m_pmulhw _mm_mulhi_pi16
472*f4a2713aSLionel Sambuc #define _m_pmullw _mm_mullo_pi16
473*f4a2713aSLionel Sambuc #define _m_psllw _mm_sll_pi16
474*f4a2713aSLionel Sambuc #define _m_psllwi _mm_slli_pi16
475*f4a2713aSLionel Sambuc #define _m_pslld _mm_sll_pi32
476*f4a2713aSLionel Sambuc #define _m_pslldi _mm_slli_pi32
477*f4a2713aSLionel Sambuc #define _m_psllq _mm_sll_si64
478*f4a2713aSLionel Sambuc #define _m_psllqi _mm_slli_si64
479*f4a2713aSLionel Sambuc #define _m_psraw _mm_sra_pi16
480*f4a2713aSLionel Sambuc #define _m_psrawi _mm_srai_pi16
481*f4a2713aSLionel Sambuc #define _m_psrad _mm_sra_pi32
482*f4a2713aSLionel Sambuc #define _m_psradi _mm_srai_pi32
483*f4a2713aSLionel Sambuc #define _m_psrlw _mm_srl_pi16
484*f4a2713aSLionel Sambuc #define _m_psrlwi _mm_srli_pi16
485*f4a2713aSLionel Sambuc #define _m_psrld _mm_srl_pi32
486*f4a2713aSLionel Sambuc #define _m_psrldi _mm_srli_pi32
487*f4a2713aSLionel Sambuc #define _m_psrlq _mm_srl_si64
488*f4a2713aSLionel Sambuc #define _m_psrlqi _mm_srli_si64
489*f4a2713aSLionel Sambuc #define _m_pand _mm_and_si64
490*f4a2713aSLionel Sambuc #define _m_pandn _mm_andnot_si64
491*f4a2713aSLionel Sambuc #define _m_por _mm_or_si64
492*f4a2713aSLionel Sambuc #define _m_pxor _mm_xor_si64
493*f4a2713aSLionel Sambuc #define _m_pcmpeqb _mm_cmpeq_pi8
494*f4a2713aSLionel Sambuc #define _m_pcmpeqw _mm_cmpeq_pi16
495*f4a2713aSLionel Sambuc #define _m_pcmpeqd _mm_cmpeq_pi32
496*f4a2713aSLionel Sambuc #define _m_pcmpgtb _mm_cmpgt_pi8
497*f4a2713aSLionel Sambuc #define _m_pcmpgtw _mm_cmpgt_pi16
498*f4a2713aSLionel Sambuc #define _m_pcmpgtd _mm_cmpgt_pi32
499*f4a2713aSLionel Sambuc
500*f4a2713aSLionel Sambuc #endif /* __MMX__ */
501*f4a2713aSLionel Sambuc
502*f4a2713aSLionel Sambuc #endif /* __MMINTRIN_H */
503*f4a2713aSLionel Sambuc
504