xref: /dflybsd-src/contrib/gcc-4.7/gcc/config/i386/mmintrin.h (revision 04febcfb30580676d3e95f58a16c5137ee478b32)
1*e4b17023SJohn Marino /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2*e4b17023SJohn Marino    Free Software Foundation, Inc.
3*e4b17023SJohn Marino 
4*e4b17023SJohn Marino    This file is part of GCC.
5*e4b17023SJohn Marino 
6*e4b17023SJohn Marino    GCC is free software; you can redistribute it and/or modify
7*e4b17023SJohn Marino    it under the terms of the GNU General Public License as published by
8*e4b17023SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
9*e4b17023SJohn Marino    any later version.
10*e4b17023SJohn Marino 
11*e4b17023SJohn Marino    GCC is distributed in the hope that it will be useful,
12*e4b17023SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*e4b17023SJohn Marino    GNU General Public License for more details.
15*e4b17023SJohn Marino 
16*e4b17023SJohn Marino    Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino    permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino    3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino 
20*e4b17023SJohn Marino    You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino    a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*e4b17023SJohn Marino    <http://www.gnu.org/licenses/>.  */
24*e4b17023SJohn Marino 
25*e4b17023SJohn Marino /* Implemented from the specification included in the Intel C++ Compiler
26*e4b17023SJohn Marino    User Guide and Reference, version 9.0.  */
27*e4b17023SJohn Marino 
28*e4b17023SJohn Marino #ifndef _MMINTRIN_H_INCLUDED
29*e4b17023SJohn Marino #define _MMINTRIN_H_INCLUDED
30*e4b17023SJohn Marino 
31*e4b17023SJohn Marino #ifndef __MMX__
32*e4b17023SJohn Marino # error "MMX instruction set not enabled"
33*e4b17023SJohn Marino #else
34*e4b17023SJohn Marino /* The Intel API is flexible enough that we must allow aliasing with other
35*e4b17023SJohn Marino    vector types, and their scalar components.  */
36*e4b17023SJohn Marino typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
37*e4b17023SJohn Marino 
38*e4b17023SJohn Marino /* Internal data types for implementing the intrinsics.  */
39*e4b17023SJohn Marino typedef int __v2si __attribute__ ((__vector_size__ (8)));
40*e4b17023SJohn Marino typedef short __v4hi __attribute__ ((__vector_size__ (8)));
41*e4b17023SJohn Marino typedef char __v8qi __attribute__ ((__vector_size__ (8)));
42*e4b17023SJohn Marino typedef long long __v1di __attribute__ ((__vector_size__ (8)));
43*e4b17023SJohn Marino typedef float __v2sf __attribute__ ((__vector_size__ (8)));
44*e4b17023SJohn Marino 
45*e4b17023SJohn Marino /* Empty the multimedia state.  */
46*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty(void)47*e4b17023SJohn Marino _mm_empty (void)
48*e4b17023SJohn Marino {
49*e4b17023SJohn Marino   __builtin_ia32_emms ();
50*e4b17023SJohn Marino }
51*e4b17023SJohn Marino 
52*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_empty(void)53*e4b17023SJohn Marino _m_empty (void)
54*e4b17023SJohn Marino {
55*e4b17023SJohn Marino   _mm_empty ();
56*e4b17023SJohn Marino }
57*e4b17023SJohn Marino 
58*e4b17023SJohn Marino /* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
59*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si64(int __i)60*e4b17023SJohn Marino _mm_cvtsi32_si64 (int __i)
61*e4b17023SJohn Marino {
62*e4b17023SJohn Marino   return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
63*e4b17023SJohn Marino }
64*e4b17023SJohn Marino 
65*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int(int __i)66*e4b17023SJohn Marino _m_from_int (int __i)
67*e4b17023SJohn Marino {
68*e4b17023SJohn Marino   return _mm_cvtsi32_si64 (__i);
69*e4b17023SJohn Marino }
70*e4b17023SJohn Marino 
71*e4b17023SJohn Marino #ifdef __x86_64__
72*e4b17023SJohn Marino /* Convert I to a __m64 object.  */
73*e4b17023SJohn Marino 
74*e4b17023SJohn Marino /* Intel intrinsic.  */
75*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int64(long long __i)76*e4b17023SJohn Marino _m_from_int64 (long long __i)
77*e4b17023SJohn Marino {
78*e4b17023SJohn Marino   return (__m64) __i;
79*e4b17023SJohn Marino }
80*e4b17023SJohn Marino 
81*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_m64(long long __i)82*e4b17023SJohn Marino _mm_cvtsi64_m64 (long long __i)
83*e4b17023SJohn Marino {
84*e4b17023SJohn Marino   return (__m64) __i;
85*e4b17023SJohn Marino }
86*e4b17023SJohn Marino 
87*e4b17023SJohn Marino /* Microsoft intrinsic.  */
88*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si64(long long __i)89*e4b17023SJohn Marino _mm_cvtsi64x_si64 (long long __i)
90*e4b17023SJohn Marino {
91*e4b17023SJohn Marino   return (__m64) __i;
92*e4b17023SJohn Marino }
93*e4b17023SJohn Marino 
94*e4b17023SJohn Marino extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi64x(long long __i)95*e4b17023SJohn Marino _mm_set_pi64x (long long __i)
96*e4b17023SJohn Marino {
97*e4b17023SJohn Marino   return (__m64) __i;
98*e4b17023SJohn Marino }
99*e4b17023SJohn Marino #endif
100*e4b17023SJohn Marino 
101*e4b17023SJohn Marino /* Convert the lower 32 bits of the __m64 object into an integer.  */
102*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si32(__m64 __i)103*e4b17023SJohn Marino _mm_cvtsi64_si32 (__m64 __i)
104*e4b17023SJohn Marino {
105*e4b17023SJohn Marino   return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
106*e4b17023SJohn Marino }
107*e4b17023SJohn Marino 
108*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int(__m64 __i)109*e4b17023SJohn Marino _m_to_int (__m64 __i)
110*e4b17023SJohn Marino {
111*e4b17023SJohn Marino   return _mm_cvtsi64_si32 (__i);
112*e4b17023SJohn Marino }
113*e4b17023SJohn Marino 
114*e4b17023SJohn Marino #ifdef __x86_64__
115*e4b17023SJohn Marino /* Convert the __m64 object to a 64bit integer.  */
116*e4b17023SJohn Marino 
117*e4b17023SJohn Marino /* Intel intrinsic.  */
118*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int64(__m64 __i)119*e4b17023SJohn Marino _m_to_int64 (__m64 __i)
120*e4b17023SJohn Marino {
121*e4b17023SJohn Marino   return (long long)__i;
122*e4b17023SJohn Marino }
123*e4b17023SJohn Marino 
124*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtm64_si64(__m64 __i)125*e4b17023SJohn Marino _mm_cvtm64_si64 (__m64 __i)
126*e4b17023SJohn Marino {
127*e4b17023SJohn Marino   return (long long)__i;
128*e4b17023SJohn Marino }
129*e4b17023SJohn Marino 
130*e4b17023SJohn Marino /* Microsoft intrinsic.  */
131*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si64x(__m64 __i)132*e4b17023SJohn Marino _mm_cvtsi64_si64x (__m64 __i)
133*e4b17023SJohn Marino {
134*e4b17023SJohn Marino   return (long long)__i;
135*e4b17023SJohn Marino }
136*e4b17023SJohn Marino #endif
137*e4b17023SJohn Marino 
138*e4b17023SJohn Marino /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
139*e4b17023SJohn Marino    the result, and the four 16-bit values from M2 into the upper four 8-bit
140*e4b17023SJohn Marino    values of the result, all with signed saturation.  */
141*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi16(__m64 __m1,__m64 __m2)142*e4b17023SJohn Marino _mm_packs_pi16 (__m64 __m1, __m64 __m2)
143*e4b17023SJohn Marino {
144*e4b17023SJohn Marino   return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
145*e4b17023SJohn Marino }
146*e4b17023SJohn Marino 
147*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packsswb(__m64 __m1,__m64 __m2)148*e4b17023SJohn Marino _m_packsswb (__m64 __m1, __m64 __m2)
149*e4b17023SJohn Marino {
150*e4b17023SJohn Marino   return _mm_packs_pi16 (__m1, __m2);
151*e4b17023SJohn Marino }
152*e4b17023SJohn Marino 
153*e4b17023SJohn Marino /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
154*e4b17023SJohn Marino    the result, and the two 32-bit values from M2 into the upper two 16-bit
155*e4b17023SJohn Marino    values of the result, all with signed saturation.  */
156*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32(__m64 __m1,__m64 __m2)157*e4b17023SJohn Marino _mm_packs_pi32 (__m64 __m1, __m64 __m2)
158*e4b17023SJohn Marino {
159*e4b17023SJohn Marino   return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
160*e4b17023SJohn Marino }
161*e4b17023SJohn Marino 
162*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packssdw(__m64 __m1,__m64 __m2)163*e4b17023SJohn Marino _m_packssdw (__m64 __m1, __m64 __m2)
164*e4b17023SJohn Marino {
165*e4b17023SJohn Marino   return _mm_packs_pi32 (__m1, __m2);
166*e4b17023SJohn Marino }
167*e4b17023SJohn Marino 
168*e4b17023SJohn Marino /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
169*e4b17023SJohn Marino    the result, and the four 16-bit values from M2 into the upper four 8-bit
170*e4b17023SJohn Marino    values of the result, all with unsigned saturation.  */
171*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16(__m64 __m1,__m64 __m2)172*e4b17023SJohn Marino _mm_packs_pu16 (__m64 __m1, __m64 __m2)
173*e4b17023SJohn Marino {
174*e4b17023SJohn Marino   return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
175*e4b17023SJohn Marino }
176*e4b17023SJohn Marino 
177*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packuswb(__m64 __m1,__m64 __m2)178*e4b17023SJohn Marino _m_packuswb (__m64 __m1, __m64 __m2)
179*e4b17023SJohn Marino {
180*e4b17023SJohn Marino   return _mm_packs_pu16 (__m1, __m2);
181*e4b17023SJohn Marino }
182*e4b17023SJohn Marino 
183*e4b17023SJohn Marino /* Interleave the four 8-bit values from the high half of M1 with the four
184*e4b17023SJohn Marino    8-bit values from the high half of M2.  */
185*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8(__m64 __m1,__m64 __m2)186*e4b17023SJohn Marino _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
187*e4b17023SJohn Marino {
188*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
189*e4b17023SJohn Marino }
190*e4b17023SJohn Marino 
191*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhbw(__m64 __m1,__m64 __m2)192*e4b17023SJohn Marino _m_punpckhbw (__m64 __m1, __m64 __m2)
193*e4b17023SJohn Marino {
194*e4b17023SJohn Marino   return _mm_unpackhi_pi8 (__m1, __m2);
195*e4b17023SJohn Marino }
196*e4b17023SJohn Marino 
197*e4b17023SJohn Marino /* Interleave the two 16-bit values from the high half of M1 with the two
198*e4b17023SJohn Marino    16-bit values from the high half of M2.  */
199*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16(__m64 __m1,__m64 __m2)200*e4b17023SJohn Marino _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
203*e4b17023SJohn Marino }
204*e4b17023SJohn Marino 
205*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhwd(__m64 __m1,__m64 __m2)206*e4b17023SJohn Marino _m_punpckhwd (__m64 __m1, __m64 __m2)
207*e4b17023SJohn Marino {
208*e4b17023SJohn Marino   return _mm_unpackhi_pi16 (__m1, __m2);
209*e4b17023SJohn Marino }
210*e4b17023SJohn Marino 
211*e4b17023SJohn Marino /* Interleave the 32-bit value from the high half of M1 with the 32-bit
212*e4b17023SJohn Marino    value from the high half of M2.  */
213*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi32(__m64 __m1,__m64 __m2)214*e4b17023SJohn Marino _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
215*e4b17023SJohn Marino {
216*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
217*e4b17023SJohn Marino }
218*e4b17023SJohn Marino 
219*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhdq(__m64 __m1,__m64 __m2)220*e4b17023SJohn Marino _m_punpckhdq (__m64 __m1, __m64 __m2)
221*e4b17023SJohn Marino {
222*e4b17023SJohn Marino   return _mm_unpackhi_pi32 (__m1, __m2);
223*e4b17023SJohn Marino }
224*e4b17023SJohn Marino 
225*e4b17023SJohn Marino /* Interleave the four 8-bit values from the low half of M1 with the four
226*e4b17023SJohn Marino    8-bit values from the low half of M2.  */
227*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8(__m64 __m1,__m64 __m2)228*e4b17023SJohn Marino _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
229*e4b17023SJohn Marino {
230*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
231*e4b17023SJohn Marino }
232*e4b17023SJohn Marino 
233*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklbw(__m64 __m1,__m64 __m2)234*e4b17023SJohn Marino _m_punpcklbw (__m64 __m1, __m64 __m2)
235*e4b17023SJohn Marino {
236*e4b17023SJohn Marino   return _mm_unpacklo_pi8 (__m1, __m2);
237*e4b17023SJohn Marino }
238*e4b17023SJohn Marino 
239*e4b17023SJohn Marino /* Interleave the two 16-bit values from the low half of M1 with the two
240*e4b17023SJohn Marino    16-bit values from the low half of M2.  */
241*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16(__m64 __m1,__m64 __m2)242*e4b17023SJohn Marino _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
243*e4b17023SJohn Marino {
244*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
245*e4b17023SJohn Marino }
246*e4b17023SJohn Marino 
247*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklwd(__m64 __m1,__m64 __m2)248*e4b17023SJohn Marino _m_punpcklwd (__m64 __m1, __m64 __m2)
249*e4b17023SJohn Marino {
250*e4b17023SJohn Marino   return _mm_unpacklo_pi16 (__m1, __m2);
251*e4b17023SJohn Marino }
252*e4b17023SJohn Marino 
253*e4b17023SJohn Marino /* Interleave the 32-bit value from the low half of M1 with the 32-bit
254*e4b17023SJohn Marino    value from the low half of M2.  */
255*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi32(__m64 __m1,__m64 __m2)256*e4b17023SJohn Marino _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
257*e4b17023SJohn Marino {
258*e4b17023SJohn Marino   return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
259*e4b17023SJohn Marino }
260*e4b17023SJohn Marino 
261*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckldq(__m64 __m1,__m64 __m2)262*e4b17023SJohn Marino _m_punpckldq (__m64 __m1, __m64 __m2)
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino   return _mm_unpacklo_pi32 (__m1, __m2);
265*e4b17023SJohn Marino }
266*e4b17023SJohn Marino 
267*e4b17023SJohn Marino /* Add the 8-bit values in M1 to the 8-bit values in M2.  */
268*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi8(__m64 __m1,__m64 __m2)269*e4b17023SJohn Marino _mm_add_pi8 (__m64 __m1, __m64 __m2)
270*e4b17023SJohn Marino {
271*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
272*e4b17023SJohn Marino }
273*e4b17023SJohn Marino 
274*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddb(__m64 __m1,__m64 __m2)275*e4b17023SJohn Marino _m_paddb (__m64 __m1, __m64 __m2)
276*e4b17023SJohn Marino {
277*e4b17023SJohn Marino   return _mm_add_pi8 (__m1, __m2);
278*e4b17023SJohn Marino }
279*e4b17023SJohn Marino 
280*e4b17023SJohn Marino /* Add the 16-bit values in M1 to the 16-bit values in M2.  */
281*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16(__m64 __m1,__m64 __m2)282*e4b17023SJohn Marino _mm_add_pi16 (__m64 __m1, __m64 __m2)
283*e4b17023SJohn Marino {
284*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
285*e4b17023SJohn Marino }
286*e4b17023SJohn Marino 
287*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddw(__m64 __m1,__m64 __m2)288*e4b17023SJohn Marino _m_paddw (__m64 __m1, __m64 __m2)
289*e4b17023SJohn Marino {
290*e4b17023SJohn Marino   return _mm_add_pi16 (__m1, __m2);
291*e4b17023SJohn Marino }
292*e4b17023SJohn Marino 
293*e4b17023SJohn Marino /* Add the 32-bit values in M1 to the 32-bit values in M2.  */
294*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32(__m64 __m1,__m64 __m2)295*e4b17023SJohn Marino _mm_add_pi32 (__m64 __m1, __m64 __m2)
296*e4b17023SJohn Marino {
297*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
298*e4b17023SJohn Marino }
299*e4b17023SJohn Marino 
300*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddd(__m64 __m1,__m64 __m2)301*e4b17023SJohn Marino _m_paddd (__m64 __m1, __m64 __m2)
302*e4b17023SJohn Marino {
303*e4b17023SJohn Marino   return _mm_add_pi32 (__m1, __m2);
304*e4b17023SJohn Marino }
305*e4b17023SJohn Marino 
306*e4b17023SJohn Marino /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
307*e4b17023SJohn Marino #ifdef __SSE2__
308*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64(__m64 __m1,__m64 __m2)309*e4b17023SJohn Marino _mm_add_si64 (__m64 __m1, __m64 __m2)
310*e4b17023SJohn Marino {
311*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
312*e4b17023SJohn Marino }
313*e4b17023SJohn Marino #endif
314*e4b17023SJohn Marino 
315*e4b17023SJohn Marino /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
316*e4b17023SJohn Marino    saturated arithmetic.  */
317*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi8(__m64 __m1,__m64 __m2)318*e4b17023SJohn Marino _mm_adds_pi8 (__m64 __m1, __m64 __m2)
319*e4b17023SJohn Marino {
320*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
321*e4b17023SJohn Marino }
322*e4b17023SJohn Marino 
323*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsb(__m64 __m1,__m64 __m2)324*e4b17023SJohn Marino _m_paddsb (__m64 __m1, __m64 __m2)
325*e4b17023SJohn Marino {
326*e4b17023SJohn Marino   return _mm_adds_pi8 (__m1, __m2);
327*e4b17023SJohn Marino }
328*e4b17023SJohn Marino 
329*e4b17023SJohn Marino /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
330*e4b17023SJohn Marino    saturated arithmetic.  */
331*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi16(__m64 __m1,__m64 __m2)332*e4b17023SJohn Marino _mm_adds_pi16 (__m64 __m1, __m64 __m2)
333*e4b17023SJohn Marino {
334*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
335*e4b17023SJohn Marino }
336*e4b17023SJohn Marino 
337*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsw(__m64 __m1,__m64 __m2)338*e4b17023SJohn Marino _m_paddsw (__m64 __m1, __m64 __m2)
339*e4b17023SJohn Marino {
340*e4b17023SJohn Marino   return _mm_adds_pi16 (__m1, __m2);
341*e4b17023SJohn Marino }
342*e4b17023SJohn Marino 
343*e4b17023SJohn Marino /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
344*e4b17023SJohn Marino    saturated arithmetic.  */
345*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8(__m64 __m1,__m64 __m2)346*e4b17023SJohn Marino _mm_adds_pu8 (__m64 __m1, __m64 __m2)
347*e4b17023SJohn Marino {
348*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
349*e4b17023SJohn Marino }
350*e4b17023SJohn Marino 
351*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusb(__m64 __m1,__m64 __m2)352*e4b17023SJohn Marino _m_paddusb (__m64 __m1, __m64 __m2)
353*e4b17023SJohn Marino {
354*e4b17023SJohn Marino   return _mm_adds_pu8 (__m1, __m2);
355*e4b17023SJohn Marino }
356*e4b17023SJohn Marino 
357*e4b17023SJohn Marino /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
358*e4b17023SJohn Marino    saturated arithmetic.  */
359*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16(__m64 __m1,__m64 __m2)360*e4b17023SJohn Marino _mm_adds_pu16 (__m64 __m1, __m64 __m2)
361*e4b17023SJohn Marino {
362*e4b17023SJohn Marino   return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
363*e4b17023SJohn Marino }
364*e4b17023SJohn Marino 
365*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusw(__m64 __m1,__m64 __m2)366*e4b17023SJohn Marino _m_paddusw (__m64 __m1, __m64 __m2)
367*e4b17023SJohn Marino {
368*e4b17023SJohn Marino   return _mm_adds_pu16 (__m1, __m2);
369*e4b17023SJohn Marino }
370*e4b17023SJohn Marino 
371*e4b17023SJohn Marino /* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
372*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi8(__m64 __m1,__m64 __m2)373*e4b17023SJohn Marino _mm_sub_pi8 (__m64 __m1, __m64 __m2)
374*e4b17023SJohn Marino {
375*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
376*e4b17023SJohn Marino }
377*e4b17023SJohn Marino 
378*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubb(__m64 __m1,__m64 __m2)379*e4b17023SJohn Marino _m_psubb (__m64 __m1, __m64 __m2)
380*e4b17023SJohn Marino {
381*e4b17023SJohn Marino   return _mm_sub_pi8 (__m1, __m2);
382*e4b17023SJohn Marino }
383*e4b17023SJohn Marino 
384*e4b17023SJohn Marino /* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
385*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16(__m64 __m1,__m64 __m2)386*e4b17023SJohn Marino _mm_sub_pi16 (__m64 __m1, __m64 __m2)
387*e4b17023SJohn Marino {
388*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
389*e4b17023SJohn Marino }
390*e4b17023SJohn Marino 
391*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubw(__m64 __m1,__m64 __m2)392*e4b17023SJohn Marino _m_psubw (__m64 __m1, __m64 __m2)
393*e4b17023SJohn Marino {
394*e4b17023SJohn Marino   return _mm_sub_pi16 (__m1, __m2);
395*e4b17023SJohn Marino }
396*e4b17023SJohn Marino 
397*e4b17023SJohn Marino /* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
398*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi32(__m64 __m1,__m64 __m2)399*e4b17023SJohn Marino _mm_sub_pi32 (__m64 __m1, __m64 __m2)
400*e4b17023SJohn Marino {
401*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
402*e4b17023SJohn Marino }
403*e4b17023SJohn Marino 
404*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubd(__m64 __m1,__m64 __m2)405*e4b17023SJohn Marino _m_psubd (__m64 __m1, __m64 __m2)
406*e4b17023SJohn Marino {
407*e4b17023SJohn Marino   return _mm_sub_pi32 (__m1, __m2);
408*e4b17023SJohn Marino }
409*e4b17023SJohn Marino 
410*e4b17023SJohn Marino /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
411*e4b17023SJohn Marino #ifdef __SSE2__
412*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64(__m64 __m1,__m64 __m2)413*e4b17023SJohn Marino _mm_sub_si64 (__m64 __m1, __m64 __m2)
414*e4b17023SJohn Marino {
415*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
416*e4b17023SJohn Marino }
417*e4b17023SJohn Marino #endif
418*e4b17023SJohn Marino 
419*e4b17023SJohn Marino /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
420*e4b17023SJohn Marino    saturating arithmetic.  */
421*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi8(__m64 __m1,__m64 __m2)422*e4b17023SJohn Marino _mm_subs_pi8 (__m64 __m1, __m64 __m2)
423*e4b17023SJohn Marino {
424*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
425*e4b17023SJohn Marino }
426*e4b17023SJohn Marino 
427*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsb(__m64 __m1,__m64 __m2)428*e4b17023SJohn Marino _m_psubsb (__m64 __m1, __m64 __m2)
429*e4b17023SJohn Marino {
430*e4b17023SJohn Marino   return _mm_subs_pi8 (__m1, __m2);
431*e4b17023SJohn Marino }
432*e4b17023SJohn Marino 
433*e4b17023SJohn Marino /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
434*e4b17023SJohn Marino    signed saturating arithmetic.  */
435*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi16(__m64 __m1,__m64 __m2)436*e4b17023SJohn Marino _mm_subs_pi16 (__m64 __m1, __m64 __m2)
437*e4b17023SJohn Marino {
438*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
439*e4b17023SJohn Marino }
440*e4b17023SJohn Marino 
441*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsw(__m64 __m1,__m64 __m2)442*e4b17023SJohn Marino _m_psubsw (__m64 __m1, __m64 __m2)
443*e4b17023SJohn Marino {
444*e4b17023SJohn Marino   return _mm_subs_pi16 (__m1, __m2);
445*e4b17023SJohn Marino }
446*e4b17023SJohn Marino 
447*e4b17023SJohn Marino /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
448*e4b17023SJohn Marino    unsigned saturating arithmetic.  */
449*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu8(__m64 __m1,__m64 __m2)450*e4b17023SJohn Marino _mm_subs_pu8 (__m64 __m1, __m64 __m2)
451*e4b17023SJohn Marino {
452*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
453*e4b17023SJohn Marino }
454*e4b17023SJohn Marino 
455*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusb(__m64 __m1,__m64 __m2)456*e4b17023SJohn Marino _m_psubusb (__m64 __m1, __m64 __m2)
457*e4b17023SJohn Marino {
458*e4b17023SJohn Marino   return _mm_subs_pu8 (__m1, __m2);
459*e4b17023SJohn Marino }
460*e4b17023SJohn Marino 
461*e4b17023SJohn Marino /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
462*e4b17023SJohn Marino    unsigned saturating arithmetic.  */
463*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu16(__m64 __m1,__m64 __m2)464*e4b17023SJohn Marino _mm_subs_pu16 (__m64 __m1, __m64 __m2)
465*e4b17023SJohn Marino {
466*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
467*e4b17023SJohn Marino }
468*e4b17023SJohn Marino 
469*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusw(__m64 __m1,__m64 __m2)470*e4b17023SJohn Marino _m_psubusw (__m64 __m1, __m64 __m2)
471*e4b17023SJohn Marino {
472*e4b17023SJohn Marino   return _mm_subs_pu16 (__m1, __m2);
473*e4b17023SJohn Marino }
474*e4b17023SJohn Marino 
475*e4b17023SJohn Marino /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
476*e4b17023SJohn Marino    four 32-bit intermediate results, which are then summed by pairs to
477*e4b17023SJohn Marino    produce two 32-bit results.  */
478*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16(__m64 __m1,__m64 __m2)479*e4b17023SJohn Marino _mm_madd_pi16 (__m64 __m1, __m64 __m2)
480*e4b17023SJohn Marino {
481*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
482*e4b17023SJohn Marino }
483*e4b17023SJohn Marino 
484*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaddwd(__m64 __m1,__m64 __m2)485*e4b17023SJohn Marino _m_pmaddwd (__m64 __m1, __m64 __m2)
486*e4b17023SJohn Marino {
487*e4b17023SJohn Marino   return _mm_madd_pi16 (__m1, __m2);
488*e4b17023SJohn Marino }
489*e4b17023SJohn Marino 
490*e4b17023SJohn Marino /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
491*e4b17023SJohn Marino    M2 and produce the high 16 bits of the 32-bit results.  */
492*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pi16(__m64 __m1,__m64 __m2)493*e4b17023SJohn Marino _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
494*e4b17023SJohn Marino {
495*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
496*e4b17023SJohn Marino }
497*e4b17023SJohn Marino 
498*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhw(__m64 __m1,__m64 __m2)499*e4b17023SJohn Marino _m_pmulhw (__m64 __m1, __m64 __m2)
500*e4b17023SJohn Marino {
501*e4b17023SJohn Marino   return _mm_mulhi_pi16 (__m1, __m2);
502*e4b17023SJohn Marino }
503*e4b17023SJohn Marino 
504*e4b17023SJohn Marino /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
505*e4b17023SJohn Marino    the low 16 bits of the results.  */
506*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16(__m64 __m1,__m64 __m2)507*e4b17023SJohn Marino _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
508*e4b17023SJohn Marino {
509*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
510*e4b17023SJohn Marino }
511*e4b17023SJohn Marino 
512*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmullw(__m64 __m1,__m64 __m2)513*e4b17023SJohn Marino _m_pmullw (__m64 __m1, __m64 __m2)
514*e4b17023SJohn Marino {
515*e4b17023SJohn Marino   return _mm_mullo_pi16 (__m1, __m2);
516*e4b17023SJohn Marino }
517*e4b17023SJohn Marino 
518*e4b17023SJohn Marino /* Shift four 16-bit values in M left by COUNT.  */
519*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi16(__m64 __m,__m64 __count)520*e4b17023SJohn Marino _mm_sll_pi16 (__m64 __m, __m64 __count)
521*e4b17023SJohn Marino {
522*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
523*e4b17023SJohn Marino }
524*e4b17023SJohn Marino 
525*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllw(__m64 __m,__m64 __count)526*e4b17023SJohn Marino _m_psllw (__m64 __m, __m64 __count)
527*e4b17023SJohn Marino {
528*e4b17023SJohn Marino   return _mm_sll_pi16 (__m, __count);
529*e4b17023SJohn Marino }
530*e4b17023SJohn Marino 
531*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16(__m64 __m,int __count)532*e4b17023SJohn Marino _mm_slli_pi16 (__m64 __m, int __count)
533*e4b17023SJohn Marino {
534*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
535*e4b17023SJohn Marino }
536*e4b17023SJohn Marino 
537*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllwi(__m64 __m,int __count)538*e4b17023SJohn Marino _m_psllwi (__m64 __m, int __count)
539*e4b17023SJohn Marino {
540*e4b17023SJohn Marino   return _mm_slli_pi16 (__m, __count);
541*e4b17023SJohn Marino }
542*e4b17023SJohn Marino 
543*e4b17023SJohn Marino /* Shift two 32-bit values in M left by COUNT.  */
544*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi32(__m64 __m,__m64 __count)545*e4b17023SJohn Marino _mm_sll_pi32 (__m64 __m, __m64 __count)
546*e4b17023SJohn Marino {
547*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
548*e4b17023SJohn Marino }
549*e4b17023SJohn Marino 
550*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslld(__m64 __m,__m64 __count)551*e4b17023SJohn Marino _m_pslld (__m64 __m, __m64 __count)
552*e4b17023SJohn Marino {
553*e4b17023SJohn Marino   return _mm_sll_pi32 (__m, __count);
554*e4b17023SJohn Marino }
555*e4b17023SJohn Marino 
556*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi32(__m64 __m,int __count)557*e4b17023SJohn Marino _mm_slli_pi32 (__m64 __m, int __count)
558*e4b17023SJohn Marino {
559*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
560*e4b17023SJohn Marino }
561*e4b17023SJohn Marino 
562*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslldi(__m64 __m,int __count)563*e4b17023SJohn Marino _m_pslldi (__m64 __m, int __count)
564*e4b17023SJohn Marino {
565*e4b17023SJohn Marino   return _mm_slli_pi32 (__m, __count);
566*e4b17023SJohn Marino }
567*e4b17023SJohn Marino 
568*e4b17023SJohn Marino /* Shift the 64-bit value in M left by COUNT.  */
569*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_si64(__m64 __m,__m64 __count)570*e4b17023SJohn Marino _mm_sll_si64 (__m64 __m, __m64 __count)
571*e4b17023SJohn Marino {
572*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
573*e4b17023SJohn Marino }
574*e4b17023SJohn Marino 
575*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllq(__m64 __m,__m64 __count)576*e4b17023SJohn Marino _m_psllq (__m64 __m, __m64 __count)
577*e4b17023SJohn Marino {
578*e4b17023SJohn Marino   return _mm_sll_si64 (__m, __count);
579*e4b17023SJohn Marino }
580*e4b17023SJohn Marino 
581*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64(__m64 __m,int __count)582*e4b17023SJohn Marino _mm_slli_si64 (__m64 __m, int __count)
583*e4b17023SJohn Marino {
584*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
585*e4b17023SJohn Marino }
586*e4b17023SJohn Marino 
587*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllqi(__m64 __m,int __count)588*e4b17023SJohn Marino _m_psllqi (__m64 __m, int __count)
589*e4b17023SJohn Marino {
590*e4b17023SJohn Marino   return _mm_slli_si64 (__m, __count);
591*e4b17023SJohn Marino }
592*e4b17023SJohn Marino 
593*e4b17023SJohn Marino /* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
594*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi16(__m64 __m,__m64 __count)595*e4b17023SJohn Marino _mm_sra_pi16 (__m64 __m, __m64 __count)
596*e4b17023SJohn Marino {
597*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
598*e4b17023SJohn Marino }
599*e4b17023SJohn Marino 
600*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psraw(__m64 __m,__m64 __count)601*e4b17023SJohn Marino _m_psraw (__m64 __m, __m64 __count)
602*e4b17023SJohn Marino {
603*e4b17023SJohn Marino   return _mm_sra_pi16 (__m, __count);
604*e4b17023SJohn Marino }
605*e4b17023SJohn Marino 
606*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi16(__m64 __m,int __count)607*e4b17023SJohn Marino _mm_srai_pi16 (__m64 __m, int __count)
608*e4b17023SJohn Marino {
609*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
610*e4b17023SJohn Marino }
611*e4b17023SJohn Marino 
612*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrawi(__m64 __m,int __count)613*e4b17023SJohn Marino _m_psrawi (__m64 __m, int __count)
614*e4b17023SJohn Marino {
615*e4b17023SJohn Marino   return _mm_srai_pi16 (__m, __count);
616*e4b17023SJohn Marino }
617*e4b17023SJohn Marino 
618*e4b17023SJohn Marino /* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
619*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi32(__m64 __m,__m64 __count)620*e4b17023SJohn Marino _mm_sra_pi32 (__m64 __m, __m64 __count)
621*e4b17023SJohn Marino {
622*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
623*e4b17023SJohn Marino }
624*e4b17023SJohn Marino 
625*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrad(__m64 __m,__m64 __count)626*e4b17023SJohn Marino _m_psrad (__m64 __m, __m64 __count)
627*e4b17023SJohn Marino {
628*e4b17023SJohn Marino   return _mm_sra_pi32 (__m, __count);
629*e4b17023SJohn Marino }
630*e4b17023SJohn Marino 
631*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi32(__m64 __m,int __count)632*e4b17023SJohn Marino _mm_srai_pi32 (__m64 __m, int __count)
633*e4b17023SJohn Marino {
634*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
635*e4b17023SJohn Marino }
636*e4b17023SJohn Marino 
637*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psradi(__m64 __m,int __count)638*e4b17023SJohn Marino _m_psradi (__m64 __m, int __count)
639*e4b17023SJohn Marino {
640*e4b17023SJohn Marino   return _mm_srai_pi32 (__m, __count);
641*e4b17023SJohn Marino }
642*e4b17023SJohn Marino 
643*e4b17023SJohn Marino /* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
644*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi16(__m64 __m,__m64 __count)645*e4b17023SJohn Marino _mm_srl_pi16 (__m64 __m, __m64 __count)
646*e4b17023SJohn Marino {
647*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
648*e4b17023SJohn Marino }
649*e4b17023SJohn Marino 
650*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlw(__m64 __m,__m64 __count)651*e4b17023SJohn Marino _m_psrlw (__m64 __m, __m64 __count)
652*e4b17023SJohn Marino {
653*e4b17023SJohn Marino   return _mm_srl_pi16 (__m, __count);
654*e4b17023SJohn Marino }
655*e4b17023SJohn Marino 
656*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16(__m64 __m,int __count)657*e4b17023SJohn Marino _mm_srli_pi16 (__m64 __m, int __count)
658*e4b17023SJohn Marino {
659*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
660*e4b17023SJohn Marino }
661*e4b17023SJohn Marino 
662*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlwi(__m64 __m,int __count)663*e4b17023SJohn Marino _m_psrlwi (__m64 __m, int __count)
664*e4b17023SJohn Marino {
665*e4b17023SJohn Marino   return _mm_srli_pi16 (__m, __count);
666*e4b17023SJohn Marino }
667*e4b17023SJohn Marino 
668*e4b17023SJohn Marino /* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
669*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi32(__m64 __m,__m64 __count)670*e4b17023SJohn Marino _mm_srl_pi32 (__m64 __m, __m64 __count)
671*e4b17023SJohn Marino {
672*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
673*e4b17023SJohn Marino }
674*e4b17023SJohn Marino 
675*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrld(__m64 __m,__m64 __count)676*e4b17023SJohn Marino _m_psrld (__m64 __m, __m64 __count)
677*e4b17023SJohn Marino {
678*e4b17023SJohn Marino   return _mm_srl_pi32 (__m, __count);
679*e4b17023SJohn Marino }
680*e4b17023SJohn Marino 
681*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32(__m64 __m,int __count)682*e4b17023SJohn Marino _mm_srli_pi32 (__m64 __m, int __count)
683*e4b17023SJohn Marino {
684*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
685*e4b17023SJohn Marino }
686*e4b17023SJohn Marino 
687*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrldi(__m64 __m,int __count)688*e4b17023SJohn Marino _m_psrldi (__m64 __m, int __count)
689*e4b17023SJohn Marino {
690*e4b17023SJohn Marino   return _mm_srli_pi32 (__m, __count);
691*e4b17023SJohn Marino }
692*e4b17023SJohn Marino 
693*e4b17023SJohn Marino /* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
694*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_si64(__m64 __m,__m64 __count)695*e4b17023SJohn Marino _mm_srl_si64 (__m64 __m, __m64 __count)
696*e4b17023SJohn Marino {
697*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
698*e4b17023SJohn Marino }
699*e4b17023SJohn Marino 
700*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlq(__m64 __m,__m64 __count)701*e4b17023SJohn Marino _m_psrlq (__m64 __m, __m64 __count)
702*e4b17023SJohn Marino {
703*e4b17023SJohn Marino   return _mm_srl_si64 (__m, __count);
704*e4b17023SJohn Marino }
705*e4b17023SJohn Marino 
706*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64(__m64 __m,int __count)707*e4b17023SJohn Marino _mm_srli_si64 (__m64 __m, int __count)
708*e4b17023SJohn Marino {
709*e4b17023SJohn Marino   return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
710*e4b17023SJohn Marino }
711*e4b17023SJohn Marino 
712*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlqi(__m64 __m,int __count)713*e4b17023SJohn Marino _m_psrlqi (__m64 __m, int __count)
714*e4b17023SJohn Marino {
715*e4b17023SJohn Marino   return _mm_srli_si64 (__m, __count);
716*e4b17023SJohn Marino }
717*e4b17023SJohn Marino 
718*e4b17023SJohn Marino /* Bit-wise AND the 64-bit values in M1 and M2.  */
719*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64(__m64 __m1,__m64 __m2)720*e4b17023SJohn Marino _mm_and_si64 (__m64 __m1, __m64 __m2)
721*e4b17023SJohn Marino {
722*e4b17023SJohn Marino   return __builtin_ia32_pand (__m1, __m2);
723*e4b17023SJohn Marino }
724*e4b17023SJohn Marino 
725*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pand(__m64 __m1,__m64 __m2)726*e4b17023SJohn Marino _m_pand (__m64 __m1, __m64 __m2)
727*e4b17023SJohn Marino {
728*e4b17023SJohn Marino   return _mm_and_si64 (__m1, __m2);
729*e4b17023SJohn Marino }
730*e4b17023SJohn Marino 
731*e4b17023SJohn Marino /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
732*e4b17023SJohn Marino    64-bit value in M2.  */
733*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si64(__m64 __m1,__m64 __m2)734*e4b17023SJohn Marino _mm_andnot_si64 (__m64 __m1, __m64 __m2)
735*e4b17023SJohn Marino {
736*e4b17023SJohn Marino   return __builtin_ia32_pandn (__m1, __m2);
737*e4b17023SJohn Marino }
738*e4b17023SJohn Marino 
739*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pandn(__m64 __m1,__m64 __m2)740*e4b17023SJohn Marino _m_pandn (__m64 __m1, __m64 __m2)
741*e4b17023SJohn Marino {
742*e4b17023SJohn Marino   return _mm_andnot_si64 (__m1, __m2);
743*e4b17023SJohn Marino }
744*e4b17023SJohn Marino 
745*e4b17023SJohn Marino /* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
746*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64(__m64 __m1,__m64 __m2)747*e4b17023SJohn Marino _mm_or_si64 (__m64 __m1, __m64 __m2)
748*e4b17023SJohn Marino {
749*e4b17023SJohn Marino   return __builtin_ia32_por (__m1, __m2);
750*e4b17023SJohn Marino }
751*e4b17023SJohn Marino 
752*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_por(__m64 __m1,__m64 __m2)753*e4b17023SJohn Marino _m_por (__m64 __m1, __m64 __m2)
754*e4b17023SJohn Marino {
755*e4b17023SJohn Marino   return _mm_or_si64 (__m1, __m2);
756*e4b17023SJohn Marino }
757*e4b17023SJohn Marino 
758*e4b17023SJohn Marino /* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
759*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64(__m64 __m1,__m64 __m2)760*e4b17023SJohn Marino _mm_xor_si64 (__m64 __m1, __m64 __m2)
761*e4b17023SJohn Marino {
762*e4b17023SJohn Marino   return __builtin_ia32_pxor (__m1, __m2);
763*e4b17023SJohn Marino }
764*e4b17023SJohn Marino 
765*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pxor(__m64 __m1,__m64 __m2)766*e4b17023SJohn Marino _m_pxor (__m64 __m1, __m64 __m2)
767*e4b17023SJohn Marino {
768*e4b17023SJohn Marino   return _mm_xor_si64 (__m1, __m2);
769*e4b17023SJohn Marino }
770*e4b17023SJohn Marino 
771*e4b17023SJohn Marino /* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
772*e4b17023SJohn Marino    test is true and zero if false.  */
773*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8(__m64 __m1,__m64 __m2)774*e4b17023SJohn Marino _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
775*e4b17023SJohn Marino {
776*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
777*e4b17023SJohn Marino }
778*e4b17023SJohn Marino 
779*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqb(__m64 __m1,__m64 __m2)780*e4b17023SJohn Marino _m_pcmpeqb (__m64 __m1, __m64 __m2)
781*e4b17023SJohn Marino {
782*e4b17023SJohn Marino   return _mm_cmpeq_pi8 (__m1, __m2);
783*e4b17023SJohn Marino }
784*e4b17023SJohn Marino 
785*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi8(__m64 __m1,__m64 __m2)786*e4b17023SJohn Marino _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
787*e4b17023SJohn Marino {
788*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
789*e4b17023SJohn Marino }
790*e4b17023SJohn Marino 
791*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtb(__m64 __m1,__m64 __m2)792*e4b17023SJohn Marino _m_pcmpgtb (__m64 __m1, __m64 __m2)
793*e4b17023SJohn Marino {
794*e4b17023SJohn Marino   return _mm_cmpgt_pi8 (__m1, __m2);
795*e4b17023SJohn Marino }
796*e4b17023SJohn Marino 
797*e4b17023SJohn Marino /* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
798*e4b17023SJohn Marino    the test is true and zero if false.  */
799*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi16(__m64 __m1,__m64 __m2)800*e4b17023SJohn Marino _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
801*e4b17023SJohn Marino {
802*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
803*e4b17023SJohn Marino }
804*e4b17023SJohn Marino 
805*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqw(__m64 __m1,__m64 __m2)806*e4b17023SJohn Marino _m_pcmpeqw (__m64 __m1, __m64 __m2)
807*e4b17023SJohn Marino {
808*e4b17023SJohn Marino   return _mm_cmpeq_pi16 (__m1, __m2);
809*e4b17023SJohn Marino }
810*e4b17023SJohn Marino 
811*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi16(__m64 __m1,__m64 __m2)812*e4b17023SJohn Marino _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
813*e4b17023SJohn Marino {
814*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
815*e4b17023SJohn Marino }
816*e4b17023SJohn Marino 
817*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtw(__m64 __m1,__m64 __m2)818*e4b17023SJohn Marino _m_pcmpgtw (__m64 __m1, __m64 __m2)
819*e4b17023SJohn Marino {
820*e4b17023SJohn Marino   return _mm_cmpgt_pi16 (__m1, __m2);
821*e4b17023SJohn Marino }
822*e4b17023SJohn Marino 
823*e4b17023SJohn Marino /* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
824*e4b17023SJohn Marino    the test is true and zero if false.  */
825*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32(__m64 __m1,__m64 __m2)826*e4b17023SJohn Marino _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
827*e4b17023SJohn Marino {
828*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
829*e4b17023SJohn Marino }
830*e4b17023SJohn Marino 
831*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqd(__m64 __m1,__m64 __m2)832*e4b17023SJohn Marino _m_pcmpeqd (__m64 __m1, __m64 __m2)
833*e4b17023SJohn Marino {
834*e4b17023SJohn Marino   return _mm_cmpeq_pi32 (__m1, __m2);
835*e4b17023SJohn Marino }
836*e4b17023SJohn Marino 
837*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi32(__m64 __m1,__m64 __m2)838*e4b17023SJohn Marino _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
839*e4b17023SJohn Marino {
840*e4b17023SJohn Marino   return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
841*e4b17023SJohn Marino }
842*e4b17023SJohn Marino 
843*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtd(__m64 __m1,__m64 __m2)844*e4b17023SJohn Marino _m_pcmpgtd (__m64 __m1, __m64 __m2)
845*e4b17023SJohn Marino {
846*e4b17023SJohn Marino   return _mm_cmpgt_pi32 (__m1, __m2);
847*e4b17023SJohn Marino }
848*e4b17023SJohn Marino 
849*e4b17023SJohn Marino /* Creates a 64-bit zero.  */
850*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64(void)851*e4b17023SJohn Marino _mm_setzero_si64 (void)
852*e4b17023SJohn Marino {
853*e4b17023SJohn Marino   return (__m64)0LL;
854*e4b17023SJohn Marino }
855*e4b17023SJohn Marino 
856*e4b17023SJohn Marino /* Creates a vector of two 32-bit values; I0 is least significant.  */
857*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32(int __i1,int __i0)858*e4b17023SJohn Marino _mm_set_pi32 (int __i1, int __i0)
859*e4b17023SJohn Marino {
860*e4b17023SJohn Marino   return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
861*e4b17023SJohn Marino }
862*e4b17023SJohn Marino 
863*e4b17023SJohn Marino /* Creates a vector of four 16-bit values; W0 is least significant.  */
864*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16(short __w3,short __w2,short __w1,short __w0)865*e4b17023SJohn Marino _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
866*e4b17023SJohn Marino {
867*e4b17023SJohn Marino   return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
868*e4b17023SJohn Marino }
869*e4b17023SJohn Marino 
870*e4b17023SJohn Marino /* Creates a vector of eight 8-bit values; B0 is least significant.  */
871*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi8(char __b7,char __b6,char __b5,char __b4,char __b3,char __b2,char __b1,char __b0)872*e4b17023SJohn Marino _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
873*e4b17023SJohn Marino 	     char __b3, char __b2, char __b1, char __b0)
874*e4b17023SJohn Marino {
875*e4b17023SJohn Marino   return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
876*e4b17023SJohn Marino 					       __b4, __b5, __b6, __b7);
877*e4b17023SJohn Marino }
878*e4b17023SJohn Marino 
879*e4b17023SJohn Marino /* Similar, but with the arguments in reverse order.  */
880*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi32(int __i0,int __i1)881*e4b17023SJohn Marino _mm_setr_pi32 (int __i0, int __i1)
882*e4b17023SJohn Marino {
883*e4b17023SJohn Marino   return _mm_set_pi32 (__i1, __i0);
884*e4b17023SJohn Marino }
885*e4b17023SJohn Marino 
886*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi16(short __w0,short __w1,short __w2,short __w3)887*e4b17023SJohn Marino _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
888*e4b17023SJohn Marino {
889*e4b17023SJohn Marino   return _mm_set_pi16 (__w3, __w2, __w1, __w0);
890*e4b17023SJohn Marino }
891*e4b17023SJohn Marino 
892*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi8(char __b0,char __b1,char __b2,char __b3,char __b4,char __b5,char __b6,char __b7)893*e4b17023SJohn Marino _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
894*e4b17023SJohn Marino 	      char __b4, char __b5, char __b6, char __b7)
895*e4b17023SJohn Marino {
896*e4b17023SJohn Marino   return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
897*e4b17023SJohn Marino }
898*e4b17023SJohn Marino 
899*e4b17023SJohn Marino /* Creates a vector of two 32-bit values, both elements containing I.  */
900*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi32(int __i)901*e4b17023SJohn Marino _mm_set1_pi32 (int __i)
902*e4b17023SJohn Marino {
903*e4b17023SJohn Marino   return _mm_set_pi32 (__i, __i);
904*e4b17023SJohn Marino }
905*e4b17023SJohn Marino 
906*e4b17023SJohn Marino /* Creates a vector of four 16-bit values, all elements containing W.  */
907*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi16(short __w)908*e4b17023SJohn Marino _mm_set1_pi16 (short __w)
909*e4b17023SJohn Marino {
910*e4b17023SJohn Marino   return _mm_set_pi16 (__w, __w, __w, __w);
911*e4b17023SJohn Marino }
912*e4b17023SJohn Marino 
913*e4b17023SJohn Marino /* Creates a vector of eight 8-bit values, all elements containing B.  */
914*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi8(char __b)915*e4b17023SJohn Marino _mm_set1_pi8 (char __b)
916*e4b17023SJohn Marino {
917*e4b17023SJohn Marino   return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
918*e4b17023SJohn Marino }
919*e4b17023SJohn Marino 
920*e4b17023SJohn Marino #endif /* __MMX__ */
921*e4b17023SJohn Marino #endif /* _MMINTRIN_H_INCLUDED */
922