1*e4b17023SJohn Marino /* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
2*e4b17023SJohn Marino Free Software Foundation, Inc.
3*e4b17023SJohn Marino
4*e4b17023SJohn Marino This file is part of GCC.
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify
7*e4b17023SJohn Marino it under the terms of the GNU General Public License as published by
8*e4b17023SJohn Marino the Free Software Foundation; either version 3, or (at your option)
9*e4b17023SJohn Marino any later version.
10*e4b17023SJohn Marino
11*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful,
12*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*e4b17023SJohn Marino GNU General Public License for more details.
15*e4b17023SJohn Marino
16*e4b17023SJohn Marino Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino 3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino
20*e4b17023SJohn Marino You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
24*e4b17023SJohn Marino
25*e4b17023SJohn Marino /* Implemented from the specification included in the Intel C++ Compiler
26*e4b17023SJohn Marino User Guide and Reference, version 9.0. */
27*e4b17023SJohn Marino
28*e4b17023SJohn Marino #ifndef _EMMINTRIN_H_INCLUDED
29*e4b17023SJohn Marino #define _EMMINTRIN_H_INCLUDED
30*e4b17023SJohn Marino
31*e4b17023SJohn Marino #ifndef __SSE2__
32*e4b17023SJohn Marino # error "SSE2 instruction set not enabled"
33*e4b17023SJohn Marino #else
34*e4b17023SJohn Marino
35*e4b17023SJohn Marino /* We need definitions from the SSE header files*/
36*e4b17023SJohn Marino #include <xmmintrin.h>
37*e4b17023SJohn Marino
38*e4b17023SJohn Marino /* SSE2 */
39*e4b17023SJohn Marino typedef double __v2df __attribute__ ((__vector_size__ (16)));
40*e4b17023SJohn Marino typedef long long __v2di __attribute__ ((__vector_size__ (16)));
41*e4b17023SJohn Marino typedef int __v4si __attribute__ ((__vector_size__ (16)));
42*e4b17023SJohn Marino typedef short __v8hi __attribute__ ((__vector_size__ (16)));
43*e4b17023SJohn Marino typedef char __v16qi __attribute__ ((__vector_size__ (16)));
44*e4b17023SJohn Marino
45*e4b17023SJohn Marino /* The Intel API is flexible enough that we must allow aliasing with other
46*e4b17023SJohn Marino vector types, and their scalar components. */
47*e4b17023SJohn Marino typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
48*e4b17023SJohn Marino typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
49*e4b17023SJohn Marino
50*e4b17023SJohn Marino /* Create a selector for use with the SHUFPD instruction. */
51*e4b17023SJohn Marino #define _MM_SHUFFLE2(fp1,fp0) \
52*e4b17023SJohn Marino (((fp1) << 1) | (fp0))
53*e4b17023SJohn Marino
54*e4b17023SJohn Marino /* Create a vector with element 0 as F and the rest zero. */
55*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sd(double __F)56*e4b17023SJohn Marino _mm_set_sd (double __F)
57*e4b17023SJohn Marino {
58*e4b17023SJohn Marino return __extension__ (__m128d){ __F, 0.0 };
59*e4b17023SJohn Marino }
60*e4b17023SJohn Marino
61*e4b17023SJohn Marino /* Create a vector with both elements equal to F. */
62*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pd(double __F)63*e4b17023SJohn Marino _mm_set1_pd (double __F)
64*e4b17023SJohn Marino {
65*e4b17023SJohn Marino return __extension__ (__m128d){ __F, __F };
66*e4b17023SJohn Marino }
67*e4b17023SJohn Marino
68*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd1(double __F)69*e4b17023SJohn Marino _mm_set_pd1 (double __F)
70*e4b17023SJohn Marino {
71*e4b17023SJohn Marino return _mm_set1_pd (__F);
72*e4b17023SJohn Marino }
73*e4b17023SJohn Marino
74*e4b17023SJohn Marino /* Create a vector with the lower value X and upper value W. */
75*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd(double __W,double __X)76*e4b17023SJohn Marino _mm_set_pd (double __W, double __X)
77*e4b17023SJohn Marino {
78*e4b17023SJohn Marino return __extension__ (__m128d){ __X, __W };
79*e4b17023SJohn Marino }
80*e4b17023SJohn Marino
81*e4b17023SJohn Marino /* Create a vector with the lower value W and upper value X. */
82*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pd(double __W,double __X)83*e4b17023SJohn Marino _mm_setr_pd (double __W, double __X)
84*e4b17023SJohn Marino {
85*e4b17023SJohn Marino return __extension__ (__m128d){ __W, __X };
86*e4b17023SJohn Marino }
87*e4b17023SJohn Marino
88*e4b17023SJohn Marino /* Create a vector of zeros. */
89*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd(void)90*e4b17023SJohn Marino _mm_setzero_pd (void)
91*e4b17023SJohn Marino {
92*e4b17023SJohn Marino return __extension__ (__m128d){ 0.0, 0.0 };
93*e4b17023SJohn Marino }
94*e4b17023SJohn Marino
95*e4b17023SJohn Marino /* Sets the low DPFP value of A from the low value of B. */
96*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sd(__m128d __A,__m128d __B)97*e4b17023SJohn Marino _mm_move_sd (__m128d __A, __m128d __B)
98*e4b17023SJohn Marino {
99*e4b17023SJohn Marino return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
100*e4b17023SJohn Marino }
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino /* Load two DPFP values from P. The address must be 16-byte aligned. */
103*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd(double const * __P)104*e4b17023SJohn Marino _mm_load_pd (double const *__P)
105*e4b17023SJohn Marino {
106*e4b17023SJohn Marino return *(__m128d *)__P;
107*e4b17023SJohn Marino }
108*e4b17023SJohn Marino
109*e4b17023SJohn Marino /* Load two DPFP values from P. The address need not be 16-byte aligned. */
110*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_pd(double const * __P)111*e4b17023SJohn Marino _mm_loadu_pd (double const *__P)
112*e4b17023SJohn Marino {
113*e4b17023SJohn Marino return __builtin_ia32_loadupd (__P);
114*e4b17023SJohn Marino }
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino /* Create a vector with all two elements equal to *P. */
117*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_pd(double const * __P)118*e4b17023SJohn Marino _mm_load1_pd (double const *__P)
119*e4b17023SJohn Marino {
120*e4b17023SJohn Marino return _mm_set1_pd (*__P);
121*e4b17023SJohn Marino }
122*e4b17023SJohn Marino
123*e4b17023SJohn Marino /* Create a vector with element 0 as *P and the rest zero. */
124*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd(double const * __P)125*e4b17023SJohn Marino _mm_load_sd (double const *__P)
126*e4b17023SJohn Marino {
127*e4b17023SJohn Marino return _mm_set_sd (*__P);
128*e4b17023SJohn Marino }
129*e4b17023SJohn Marino
130*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd1(double const * __P)131*e4b17023SJohn Marino _mm_load_pd1 (double const *__P)
132*e4b17023SJohn Marino {
133*e4b17023SJohn Marino return _mm_load1_pd (__P);
134*e4b17023SJohn Marino }
135*e4b17023SJohn Marino
136*e4b17023SJohn Marino /* Load two DPFP values in reverse order. The address must be aligned. */
137*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_pd(double const * __P)138*e4b17023SJohn Marino _mm_loadr_pd (double const *__P)
139*e4b17023SJohn Marino {
140*e4b17023SJohn Marino __m128d __tmp = _mm_load_pd (__P);
141*e4b17023SJohn Marino return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
142*e4b17023SJohn Marino }
143*e4b17023SJohn Marino
144*e4b17023SJohn Marino /* Store two DPFP values. The address must be 16-byte aligned. */
145*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd(double * __P,__m128d __A)146*e4b17023SJohn Marino _mm_store_pd (double *__P, __m128d __A)
147*e4b17023SJohn Marino {
148*e4b17023SJohn Marino *(__m128d *)__P = __A;
149*e4b17023SJohn Marino }
150*e4b17023SJohn Marino
151*e4b17023SJohn Marino /* Store two DPFP values. The address need not be 16-byte aligned. */
152*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_pd(double * __P,__m128d __A)153*e4b17023SJohn Marino _mm_storeu_pd (double *__P, __m128d __A)
154*e4b17023SJohn Marino {
155*e4b17023SJohn Marino __builtin_ia32_storeupd (__P, __A);
156*e4b17023SJohn Marino }
157*e4b17023SJohn Marino
158*e4b17023SJohn Marino /* Stores the lower DPFP value. */
159*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd(double * __P,__m128d __A)160*e4b17023SJohn Marino _mm_store_sd (double *__P, __m128d __A)
161*e4b17023SJohn Marino {
162*e4b17023SJohn Marino *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
163*e4b17023SJohn Marino }
164*e4b17023SJohn Marino
165*e4b17023SJohn Marino extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_f64(__m128d __A)166*e4b17023SJohn Marino _mm_cvtsd_f64 (__m128d __A)
167*e4b17023SJohn Marino {
168*e4b17023SJohn Marino return __builtin_ia32_vec_ext_v2df (__A, 0);
169*e4b17023SJohn Marino }
170*e4b17023SJohn Marino
171*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd(double * __P,__m128d __A)172*e4b17023SJohn Marino _mm_storel_pd (double *__P, __m128d __A)
173*e4b17023SJohn Marino {
174*e4b17023SJohn Marino _mm_store_sd (__P, __A);
175*e4b17023SJohn Marino }
176*e4b17023SJohn Marino
177*e4b17023SJohn Marino /* Stores the upper DPFP value. */
178*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pd(double * __P,__m128d __A)179*e4b17023SJohn Marino _mm_storeh_pd (double *__P, __m128d __A)
180*e4b17023SJohn Marino {
181*e4b17023SJohn Marino *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
182*e4b17023SJohn Marino }
183*e4b17023SJohn Marino
184*e4b17023SJohn Marino /* Store the lower DPFP value across two words.
185*e4b17023SJohn Marino The address must be 16-byte aligned. */
186*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_pd(double * __P,__m128d __A)187*e4b17023SJohn Marino _mm_store1_pd (double *__P, __m128d __A)
188*e4b17023SJohn Marino {
189*e4b17023SJohn Marino _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
190*e4b17023SJohn Marino }
191*e4b17023SJohn Marino
192*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd1(double * __P,__m128d __A)193*e4b17023SJohn Marino _mm_store_pd1 (double *__P, __m128d __A)
194*e4b17023SJohn Marino {
195*e4b17023SJohn Marino _mm_store1_pd (__P, __A);
196*e4b17023SJohn Marino }
197*e4b17023SJohn Marino
198*e4b17023SJohn Marino /* Store two DPFP values in reverse order. The address must be aligned. */
199*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_pd(double * __P,__m128d __A)200*e4b17023SJohn Marino _mm_storer_pd (double *__P, __m128d __A)
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
203*e4b17023SJohn Marino }
204*e4b17023SJohn Marino
205*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si32(__m128i __A)206*e4b17023SJohn Marino _mm_cvtsi128_si32 (__m128i __A)
207*e4b17023SJohn Marino {
208*e4b17023SJohn Marino return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
209*e4b17023SJohn Marino }
210*e4b17023SJohn Marino
211*e4b17023SJohn Marino #ifdef __x86_64__
212*e4b17023SJohn Marino /* Intel intrinsic. */
213*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64(__m128i __A)214*e4b17023SJohn Marino _mm_cvtsi128_si64 (__m128i __A)
215*e4b17023SJohn Marino {
216*e4b17023SJohn Marino return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
217*e4b17023SJohn Marino }
218*e4b17023SJohn Marino
219*e4b17023SJohn Marino /* Microsoft intrinsic. */
220*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64x(__m128i __A)221*e4b17023SJohn Marino _mm_cvtsi128_si64x (__m128i __A)
222*e4b17023SJohn Marino {
223*e4b17023SJohn Marino return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
224*e4b17023SJohn Marino }
225*e4b17023SJohn Marino #endif
226*e4b17023SJohn Marino
227*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pd(__m128d __A,__m128d __B)228*e4b17023SJohn Marino _mm_add_pd (__m128d __A, __m128d __B)
229*e4b17023SJohn Marino {
230*e4b17023SJohn Marino return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
231*e4b17023SJohn Marino }
232*e4b17023SJohn Marino
233*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sd(__m128d __A,__m128d __B)234*e4b17023SJohn Marino _mm_add_sd (__m128d __A, __m128d __B)
235*e4b17023SJohn Marino {
236*e4b17023SJohn Marino return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
237*e4b17023SJohn Marino }
238*e4b17023SJohn Marino
239*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pd(__m128d __A,__m128d __B)240*e4b17023SJohn Marino _mm_sub_pd (__m128d __A, __m128d __B)
241*e4b17023SJohn Marino {
242*e4b17023SJohn Marino return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
243*e4b17023SJohn Marino }
244*e4b17023SJohn Marino
245*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sd(__m128d __A,__m128d __B)246*e4b17023SJohn Marino _mm_sub_sd (__m128d __A, __m128d __B)
247*e4b17023SJohn Marino {
248*e4b17023SJohn Marino return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
249*e4b17023SJohn Marino }
250*e4b17023SJohn Marino
251*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_pd(__m128d __A,__m128d __B)252*e4b17023SJohn Marino _mm_mul_pd (__m128d __A, __m128d __B)
253*e4b17023SJohn Marino {
254*e4b17023SJohn Marino return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
255*e4b17023SJohn Marino }
256*e4b17023SJohn Marino
257*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sd(__m128d __A,__m128d __B)258*e4b17023SJohn Marino _mm_mul_sd (__m128d __A, __m128d __B)
259*e4b17023SJohn Marino {
260*e4b17023SJohn Marino return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
261*e4b17023SJohn Marino }
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_pd(__m128d __A,__m128d __B)264*e4b17023SJohn Marino _mm_div_pd (__m128d __A, __m128d __B)
265*e4b17023SJohn Marino {
266*e4b17023SJohn Marino return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
267*e4b17023SJohn Marino }
268*e4b17023SJohn Marino
269*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sd(__m128d __A,__m128d __B)270*e4b17023SJohn Marino _mm_div_sd (__m128d __A, __m128d __B)
271*e4b17023SJohn Marino {
272*e4b17023SJohn Marino return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
273*e4b17023SJohn Marino }
274*e4b17023SJohn Marino
275*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_pd(__m128d __A)276*e4b17023SJohn Marino _mm_sqrt_pd (__m128d __A)
277*e4b17023SJohn Marino {
278*e4b17023SJohn Marino return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
279*e4b17023SJohn Marino }
280*e4b17023SJohn Marino
281*e4b17023SJohn Marino /* Return pair {sqrt (A[0), B[1]}. */
282*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sd(__m128d __A,__m128d __B)283*e4b17023SJohn Marino _mm_sqrt_sd (__m128d __A, __m128d __B)
284*e4b17023SJohn Marino {
285*e4b17023SJohn Marino __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
286*e4b17023SJohn Marino return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
287*e4b17023SJohn Marino }
288*e4b17023SJohn Marino
289*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pd(__m128d __A,__m128d __B)290*e4b17023SJohn Marino _mm_min_pd (__m128d __A, __m128d __B)
291*e4b17023SJohn Marino {
292*e4b17023SJohn Marino return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
293*e4b17023SJohn Marino }
294*e4b17023SJohn Marino
295*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sd(__m128d __A,__m128d __B)296*e4b17023SJohn Marino _mm_min_sd (__m128d __A, __m128d __B)
297*e4b17023SJohn Marino {
298*e4b17023SJohn Marino return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
299*e4b17023SJohn Marino }
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pd(__m128d __A,__m128d __B)302*e4b17023SJohn Marino _mm_max_pd (__m128d __A, __m128d __B)
303*e4b17023SJohn Marino {
304*e4b17023SJohn Marino return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
305*e4b17023SJohn Marino }
306*e4b17023SJohn Marino
307*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sd(__m128d __A,__m128d __B)308*e4b17023SJohn Marino _mm_max_sd (__m128d __A, __m128d __B)
309*e4b17023SJohn Marino {
310*e4b17023SJohn Marino return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
311*e4b17023SJohn Marino }
312*e4b17023SJohn Marino
313*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_pd(__m128d __A,__m128d __B)314*e4b17023SJohn Marino _mm_and_pd (__m128d __A, __m128d __B)
315*e4b17023SJohn Marino {
316*e4b17023SJohn Marino return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
317*e4b17023SJohn Marino }
318*e4b17023SJohn Marino
319*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_pd(__m128d __A,__m128d __B)320*e4b17023SJohn Marino _mm_andnot_pd (__m128d __A, __m128d __B)
321*e4b17023SJohn Marino {
322*e4b17023SJohn Marino return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
323*e4b17023SJohn Marino }
324*e4b17023SJohn Marino
325*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_pd(__m128d __A,__m128d __B)326*e4b17023SJohn Marino _mm_or_pd (__m128d __A, __m128d __B)
327*e4b17023SJohn Marino {
328*e4b17023SJohn Marino return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
329*e4b17023SJohn Marino }
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_pd(__m128d __A,__m128d __B)332*e4b17023SJohn Marino _mm_xor_pd (__m128d __A, __m128d __B)
333*e4b17023SJohn Marino {
334*e4b17023SJohn Marino return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
335*e4b17023SJohn Marino }
336*e4b17023SJohn Marino
337*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pd(__m128d __A,__m128d __B)338*e4b17023SJohn Marino _mm_cmpeq_pd (__m128d __A, __m128d __B)
339*e4b17023SJohn Marino {
340*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
341*e4b17023SJohn Marino }
342*e4b17023SJohn Marino
343*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_pd(__m128d __A,__m128d __B)344*e4b17023SJohn Marino _mm_cmplt_pd (__m128d __A, __m128d __B)
345*e4b17023SJohn Marino {
346*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
347*e4b17023SJohn Marino }
348*e4b17023SJohn Marino
349*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_pd(__m128d __A,__m128d __B)350*e4b17023SJohn Marino _mm_cmple_pd (__m128d __A, __m128d __B)
351*e4b17023SJohn Marino {
352*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
353*e4b17023SJohn Marino }
354*e4b17023SJohn Marino
355*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pd(__m128d __A,__m128d __B)356*e4b17023SJohn Marino _mm_cmpgt_pd (__m128d __A, __m128d __B)
357*e4b17023SJohn Marino {
358*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
359*e4b17023SJohn Marino }
360*e4b17023SJohn Marino
361*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_pd(__m128d __A,__m128d __B)362*e4b17023SJohn Marino _mm_cmpge_pd (__m128d __A, __m128d __B)
363*e4b17023SJohn Marino {
364*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
365*e4b17023SJohn Marino }
366*e4b17023SJohn Marino
367*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_pd(__m128d __A,__m128d __B)368*e4b17023SJohn Marino _mm_cmpneq_pd (__m128d __A, __m128d __B)
369*e4b17023SJohn Marino {
370*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
371*e4b17023SJohn Marino }
372*e4b17023SJohn Marino
373*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_pd(__m128d __A,__m128d __B)374*e4b17023SJohn Marino _mm_cmpnlt_pd (__m128d __A, __m128d __B)
375*e4b17023SJohn Marino {
376*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
377*e4b17023SJohn Marino }
378*e4b17023SJohn Marino
379*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_pd(__m128d __A,__m128d __B)380*e4b17023SJohn Marino _mm_cmpnle_pd (__m128d __A, __m128d __B)
381*e4b17023SJohn Marino {
382*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
383*e4b17023SJohn Marino }
384*e4b17023SJohn Marino
385*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_pd(__m128d __A,__m128d __B)386*e4b17023SJohn Marino _mm_cmpngt_pd (__m128d __A, __m128d __B)
387*e4b17023SJohn Marino {
388*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
389*e4b17023SJohn Marino }
390*e4b17023SJohn Marino
391*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_pd(__m128d __A,__m128d __B)392*e4b17023SJohn Marino _mm_cmpnge_pd (__m128d __A, __m128d __B)
393*e4b17023SJohn Marino {
394*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
395*e4b17023SJohn Marino }
396*e4b17023SJohn Marino
397*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_pd(__m128d __A,__m128d __B)398*e4b17023SJohn Marino _mm_cmpord_pd (__m128d __A, __m128d __B)
399*e4b17023SJohn Marino {
400*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
401*e4b17023SJohn Marino }
402*e4b17023SJohn Marino
403*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_pd(__m128d __A,__m128d __B)404*e4b17023SJohn Marino _mm_cmpunord_pd (__m128d __A, __m128d __B)
405*e4b17023SJohn Marino {
406*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
407*e4b17023SJohn Marino }
408*e4b17023SJohn Marino
409*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_sd(__m128d __A,__m128d __B)410*e4b17023SJohn Marino _mm_cmpeq_sd (__m128d __A, __m128d __B)
411*e4b17023SJohn Marino {
412*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
413*e4b17023SJohn Marino }
414*e4b17023SJohn Marino
415*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_sd(__m128d __A,__m128d __B)416*e4b17023SJohn Marino _mm_cmplt_sd (__m128d __A, __m128d __B)
417*e4b17023SJohn Marino {
418*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
419*e4b17023SJohn Marino }
420*e4b17023SJohn Marino
421*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_sd(__m128d __A,__m128d __B)422*e4b17023SJohn Marino _mm_cmple_sd (__m128d __A, __m128d __B)
423*e4b17023SJohn Marino {
424*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
425*e4b17023SJohn Marino }
426*e4b17023SJohn Marino
427*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_sd(__m128d __A,__m128d __B)428*e4b17023SJohn Marino _mm_cmpgt_sd (__m128d __A, __m128d __B)
429*e4b17023SJohn Marino {
430*e4b17023SJohn Marino return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
431*e4b17023SJohn Marino (__v2df)
432*e4b17023SJohn Marino __builtin_ia32_cmpltsd ((__v2df) __B,
433*e4b17023SJohn Marino (__v2df)
434*e4b17023SJohn Marino __A));
435*e4b17023SJohn Marino }
436*e4b17023SJohn Marino
437*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_sd(__m128d __A,__m128d __B)438*e4b17023SJohn Marino _mm_cmpge_sd (__m128d __A, __m128d __B)
439*e4b17023SJohn Marino {
440*e4b17023SJohn Marino return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
441*e4b17023SJohn Marino (__v2df)
442*e4b17023SJohn Marino __builtin_ia32_cmplesd ((__v2df) __B,
443*e4b17023SJohn Marino (__v2df)
444*e4b17023SJohn Marino __A));
445*e4b17023SJohn Marino }
446*e4b17023SJohn Marino
447*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_sd(__m128d __A,__m128d __B)448*e4b17023SJohn Marino _mm_cmpneq_sd (__m128d __A, __m128d __B)
449*e4b17023SJohn Marino {
450*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
451*e4b17023SJohn Marino }
452*e4b17023SJohn Marino
453*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_sd(__m128d __A,__m128d __B)454*e4b17023SJohn Marino _mm_cmpnlt_sd (__m128d __A, __m128d __B)
455*e4b17023SJohn Marino {
456*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
457*e4b17023SJohn Marino }
458*e4b17023SJohn Marino
459*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_sd(__m128d __A,__m128d __B)460*e4b17023SJohn Marino _mm_cmpnle_sd (__m128d __A, __m128d __B)
461*e4b17023SJohn Marino {
462*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
463*e4b17023SJohn Marino }
464*e4b17023SJohn Marino
465*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_sd(__m128d __A,__m128d __B)466*e4b17023SJohn Marino _mm_cmpngt_sd (__m128d __A, __m128d __B)
467*e4b17023SJohn Marino {
468*e4b17023SJohn Marino return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
469*e4b17023SJohn Marino (__v2df)
470*e4b17023SJohn Marino __builtin_ia32_cmpnltsd ((__v2df) __B,
471*e4b17023SJohn Marino (__v2df)
472*e4b17023SJohn Marino __A));
473*e4b17023SJohn Marino }
474*e4b17023SJohn Marino
475*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_sd(__m128d __A,__m128d __B)476*e4b17023SJohn Marino _mm_cmpnge_sd (__m128d __A, __m128d __B)
477*e4b17023SJohn Marino {
478*e4b17023SJohn Marino return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
479*e4b17023SJohn Marino (__v2df)
480*e4b17023SJohn Marino __builtin_ia32_cmpnlesd ((__v2df) __B,
481*e4b17023SJohn Marino (__v2df)
482*e4b17023SJohn Marino __A));
483*e4b17023SJohn Marino }
484*e4b17023SJohn Marino
485*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_sd(__m128d __A,__m128d __B)486*e4b17023SJohn Marino _mm_cmpord_sd (__m128d __A, __m128d __B)
487*e4b17023SJohn Marino {
488*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
489*e4b17023SJohn Marino }
490*e4b17023SJohn Marino
491*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_sd(__m128d __A,__m128d __B)492*e4b17023SJohn Marino _mm_cmpunord_sd (__m128d __A, __m128d __B)
493*e4b17023SJohn Marino {
494*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
495*e4b17023SJohn Marino }
496*e4b17023SJohn Marino
497*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sd(__m128d __A,__m128d __B)498*e4b17023SJohn Marino _mm_comieq_sd (__m128d __A, __m128d __B)
499*e4b17023SJohn Marino {
500*e4b17023SJohn Marino return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
501*e4b17023SJohn Marino }
502*e4b17023SJohn Marino
503*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sd(__m128d __A,__m128d __B)504*e4b17023SJohn Marino _mm_comilt_sd (__m128d __A, __m128d __B)
505*e4b17023SJohn Marino {
506*e4b17023SJohn Marino return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
507*e4b17023SJohn Marino }
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sd(__m128d __A,__m128d __B)510*e4b17023SJohn Marino _mm_comile_sd (__m128d __A, __m128d __B)
511*e4b17023SJohn Marino {
512*e4b17023SJohn Marino return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
513*e4b17023SJohn Marino }
514*e4b17023SJohn Marino
515*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sd(__m128d __A,__m128d __B)516*e4b17023SJohn Marino _mm_comigt_sd (__m128d __A, __m128d __B)
517*e4b17023SJohn Marino {
518*e4b17023SJohn Marino return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
519*e4b17023SJohn Marino }
520*e4b17023SJohn Marino
521*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sd(__m128d __A,__m128d __B)522*e4b17023SJohn Marino _mm_comige_sd (__m128d __A, __m128d __B)
523*e4b17023SJohn Marino {
524*e4b17023SJohn Marino return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
525*e4b17023SJohn Marino }
526*e4b17023SJohn Marino
527*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sd(__m128d __A,__m128d __B)528*e4b17023SJohn Marino _mm_comineq_sd (__m128d __A, __m128d __B)
529*e4b17023SJohn Marino {
530*e4b17023SJohn Marino return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
531*e4b17023SJohn Marino }
532*e4b17023SJohn Marino
533*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sd(__m128d __A,__m128d __B)534*e4b17023SJohn Marino _mm_ucomieq_sd (__m128d __A, __m128d __B)
535*e4b17023SJohn Marino {
536*e4b17023SJohn Marino return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
537*e4b17023SJohn Marino }
538*e4b17023SJohn Marino
539*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sd(__m128d __A,__m128d __B)540*e4b17023SJohn Marino _mm_ucomilt_sd (__m128d __A, __m128d __B)
541*e4b17023SJohn Marino {
542*e4b17023SJohn Marino return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
543*e4b17023SJohn Marino }
544*e4b17023SJohn Marino
545*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sd(__m128d __A,__m128d __B)546*e4b17023SJohn Marino _mm_ucomile_sd (__m128d __A, __m128d __B)
547*e4b17023SJohn Marino {
548*e4b17023SJohn Marino return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
549*e4b17023SJohn Marino }
550*e4b17023SJohn Marino
551*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sd(__m128d __A,__m128d __B)552*e4b17023SJohn Marino _mm_ucomigt_sd (__m128d __A, __m128d __B)
553*e4b17023SJohn Marino {
554*e4b17023SJohn Marino return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
555*e4b17023SJohn Marino }
556*e4b17023SJohn Marino
557*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sd(__m128d __A,__m128d __B)558*e4b17023SJohn Marino _mm_ucomige_sd (__m128d __A, __m128d __B)
559*e4b17023SJohn Marino {
560*e4b17023SJohn Marino return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
561*e4b17023SJohn Marino }
562*e4b17023SJohn Marino
563*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sd(__m128d __A,__m128d __B)564*e4b17023SJohn Marino _mm_ucomineq_sd (__m128d __A, __m128d __B)
565*e4b17023SJohn Marino {
566*e4b17023SJohn Marino return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
567*e4b17023SJohn Marino }
568*e4b17023SJohn Marino
569*e4b17023SJohn Marino /* Create a vector of Qi, where i is the element number. */
570*e4b17023SJohn Marino
571*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64x(long long __q1,long long __q0)572*e4b17023SJohn Marino _mm_set_epi64x (long long __q1, long long __q0)
573*e4b17023SJohn Marino {
574*e4b17023SJohn Marino return __extension__ (__m128i)(__v2di){ __q0, __q1 };
575*e4b17023SJohn Marino }
576*e4b17023SJohn Marino
577*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64(__m64 __q1,__m64 __q0)578*e4b17023SJohn Marino _mm_set_epi64 (__m64 __q1, __m64 __q0)
579*e4b17023SJohn Marino {
580*e4b17023SJohn Marino return _mm_set_epi64x ((long long)__q1, (long long)__q0);
581*e4b17023SJohn Marino }
582*e4b17023SJohn Marino
583*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi32(int __q3,int __q2,int __q1,int __q0)584*e4b17023SJohn Marino _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
585*e4b17023SJohn Marino {
586*e4b17023SJohn Marino return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
587*e4b17023SJohn Marino }
588*e4b17023SJohn Marino
589*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi16(short __q7,short __q6,short __q5,short __q4,short __q3,short __q2,short __q1,short __q0)590*e4b17023SJohn Marino _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
591*e4b17023SJohn Marino short __q3, short __q2, short __q1, short __q0)
592*e4b17023SJohn Marino {
593*e4b17023SJohn Marino return __extension__ (__m128i)(__v8hi){
594*e4b17023SJohn Marino __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
595*e4b17023SJohn Marino }
596*e4b17023SJohn Marino
597*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi8(char __q15,char __q14,char __q13,char __q12,char __q11,char __q10,char __q09,char __q08,char __q07,char __q06,char __q05,char __q04,char __q03,char __q02,char __q01,char __q00)598*e4b17023SJohn Marino _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
599*e4b17023SJohn Marino char __q11, char __q10, char __q09, char __q08,
600*e4b17023SJohn Marino char __q07, char __q06, char __q05, char __q04,
601*e4b17023SJohn Marino char __q03, char __q02, char __q01, char __q00)
602*e4b17023SJohn Marino {
603*e4b17023SJohn Marino return __extension__ (__m128i)(__v16qi){
604*e4b17023SJohn Marino __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
605*e4b17023SJohn Marino __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
606*e4b17023SJohn Marino };
607*e4b17023SJohn Marino }
608*e4b17023SJohn Marino
609*e4b17023SJohn Marino /* Set all of the elements of the vector to A. */
610*e4b17023SJohn Marino
611*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64x(long long __A)612*e4b17023SJohn Marino _mm_set1_epi64x (long long __A)
613*e4b17023SJohn Marino {
614*e4b17023SJohn Marino return _mm_set_epi64x (__A, __A);
615*e4b17023SJohn Marino }
616*e4b17023SJohn Marino
617*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64(__m64 __A)618*e4b17023SJohn Marino _mm_set1_epi64 (__m64 __A)
619*e4b17023SJohn Marino {
620*e4b17023SJohn Marino return _mm_set_epi64 (__A, __A);
621*e4b17023SJohn Marino }
622*e4b17023SJohn Marino
623*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi32(int __A)624*e4b17023SJohn Marino _mm_set1_epi32 (int __A)
625*e4b17023SJohn Marino {
626*e4b17023SJohn Marino return _mm_set_epi32 (__A, __A, __A, __A);
627*e4b17023SJohn Marino }
628*e4b17023SJohn Marino
629*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi16(short __A)630*e4b17023SJohn Marino _mm_set1_epi16 (short __A)
631*e4b17023SJohn Marino {
632*e4b17023SJohn Marino return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
633*e4b17023SJohn Marino }
634*e4b17023SJohn Marino
635*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi8(char __A)636*e4b17023SJohn Marino _mm_set1_epi8 (char __A)
637*e4b17023SJohn Marino {
638*e4b17023SJohn Marino return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
639*e4b17023SJohn Marino __A, __A, __A, __A, __A, __A, __A, __A);
640*e4b17023SJohn Marino }
641*e4b17023SJohn Marino
642*e4b17023SJohn Marino /* Create a vector of Qi, where i is the element number.
643*e4b17023SJohn Marino The parameter order is reversed from the _mm_set_epi* functions. */
644*e4b17023SJohn Marino
645*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi64(__m64 __q0,__m64 __q1)646*e4b17023SJohn Marino _mm_setr_epi64 (__m64 __q0, __m64 __q1)
647*e4b17023SJohn Marino {
648*e4b17023SJohn Marino return _mm_set_epi64 (__q1, __q0);
649*e4b17023SJohn Marino }
650*e4b17023SJohn Marino
651*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi32(int __q0,int __q1,int __q2,int __q3)652*e4b17023SJohn Marino _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
653*e4b17023SJohn Marino {
654*e4b17023SJohn Marino return _mm_set_epi32 (__q3, __q2, __q1, __q0);
655*e4b17023SJohn Marino }
656*e4b17023SJohn Marino
657*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi16(short __q0,short __q1,short __q2,short __q3,short __q4,short __q5,short __q6,short __q7)658*e4b17023SJohn Marino _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
659*e4b17023SJohn Marino short __q4, short __q5, short __q6, short __q7)
660*e4b17023SJohn Marino {
661*e4b17023SJohn Marino return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
662*e4b17023SJohn Marino }
663*e4b17023SJohn Marino
664*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi8(char __q00,char __q01,char __q02,char __q03,char __q04,char __q05,char __q06,char __q07,char __q08,char __q09,char __q10,char __q11,char __q12,char __q13,char __q14,char __q15)665*e4b17023SJohn Marino _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
666*e4b17023SJohn Marino char __q04, char __q05, char __q06, char __q07,
667*e4b17023SJohn Marino char __q08, char __q09, char __q10, char __q11,
668*e4b17023SJohn Marino char __q12, char __q13, char __q14, char __q15)
669*e4b17023SJohn Marino {
670*e4b17023SJohn Marino return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
671*e4b17023SJohn Marino __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
672*e4b17023SJohn Marino }
673*e4b17023SJohn Marino
674*e4b17023SJohn Marino /* Create a vector with element 0 as *P and the rest zero. */
675*e4b17023SJohn Marino
676*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_si128(__m128i const * __P)677*e4b17023SJohn Marino _mm_load_si128 (__m128i const *__P)
678*e4b17023SJohn Marino {
679*e4b17023SJohn Marino return *__P;
680*e4b17023SJohn Marino }
681*e4b17023SJohn Marino
682*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si128(__m128i const * __P)683*e4b17023SJohn Marino _mm_loadu_si128 (__m128i const *__P)
684*e4b17023SJohn Marino {
685*e4b17023SJohn Marino return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
686*e4b17023SJohn Marino }
687*e4b17023SJohn Marino
688*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_epi64(__m128i const * __P)689*e4b17023SJohn Marino _mm_loadl_epi64 (__m128i const *__P)
690*e4b17023SJohn Marino {
691*e4b17023SJohn Marino return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
692*e4b17023SJohn Marino }
693*e4b17023SJohn Marino
694*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_si128(__m128i * __P,__m128i __B)695*e4b17023SJohn Marino _mm_store_si128 (__m128i *__P, __m128i __B)
696*e4b17023SJohn Marino {
697*e4b17023SJohn Marino *__P = __B;
698*e4b17023SJohn Marino }
699*e4b17023SJohn Marino
700*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_si128(__m128i * __P,__m128i __B)701*e4b17023SJohn Marino _mm_storeu_si128 (__m128i *__P, __m128i __B)
702*e4b17023SJohn Marino {
703*e4b17023SJohn Marino __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
704*e4b17023SJohn Marino }
705*e4b17023SJohn Marino
706*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_epi64(__m128i * __P,__m128i __B)707*e4b17023SJohn Marino _mm_storel_epi64 (__m128i *__P, __m128i __B)
708*e4b17023SJohn Marino {
709*e4b17023SJohn Marino *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
710*e4b17023SJohn Marino }
711*e4b17023SJohn Marino
712*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movepi64_pi64(__m128i __B)713*e4b17023SJohn Marino _mm_movepi64_pi64 (__m128i __B)
714*e4b17023SJohn Marino {
715*e4b17023SJohn Marino return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
716*e4b17023SJohn Marino }
717*e4b17023SJohn Marino
718*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movpi64_epi64(__m64 __A)719*e4b17023SJohn Marino _mm_movpi64_epi64 (__m64 __A)
720*e4b17023SJohn Marino {
721*e4b17023SJohn Marino return _mm_set_epi64 ((__m64)0LL, __A);
722*e4b17023SJohn Marino }
723*e4b17023SJohn Marino
724*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64(__m128i __A)725*e4b17023SJohn Marino _mm_move_epi64 (__m128i __A)
726*e4b17023SJohn Marino {
727*e4b17023SJohn Marino return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
728*e4b17023SJohn Marino }
729*e4b17023SJohn Marino
730*e4b17023SJohn Marino /* Create a vector of zeros. */
731*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128(void)732*e4b17023SJohn Marino _mm_setzero_si128 (void)
733*e4b17023SJohn Marino {
734*e4b17023SJohn Marino return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
735*e4b17023SJohn Marino }
736*e4b17023SJohn Marino
737*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_pd(__m128i __A)738*e4b17023SJohn Marino _mm_cvtepi32_pd (__m128i __A)
739*e4b17023SJohn Marino {
740*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
741*e4b17023SJohn Marino }
742*e4b17023SJohn Marino
743*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_ps(__m128i __A)744*e4b17023SJohn Marino _mm_cvtepi32_ps (__m128i __A)
745*e4b17023SJohn Marino {
746*e4b17023SJohn Marino return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino
749*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_epi32(__m128d __A)750*e4b17023SJohn Marino _mm_cvtpd_epi32 (__m128d __A)
751*e4b17023SJohn Marino {
752*e4b17023SJohn Marino return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
753*e4b17023SJohn Marino }
754*e4b17023SJohn Marino
755*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_pi32(__m128d __A)756*e4b17023SJohn Marino _mm_cvtpd_pi32 (__m128d __A)
757*e4b17023SJohn Marino {
758*e4b17023SJohn Marino return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
759*e4b17023SJohn Marino }
760*e4b17023SJohn Marino
761*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_ps(__m128d __A)762*e4b17023SJohn Marino _mm_cvtpd_ps (__m128d __A)
763*e4b17023SJohn Marino {
764*e4b17023SJohn Marino return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
765*e4b17023SJohn Marino }
766*e4b17023SJohn Marino
767*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_epi32(__m128d __A)768*e4b17023SJohn Marino _mm_cvttpd_epi32 (__m128d __A)
769*e4b17023SJohn Marino {
770*e4b17023SJohn Marino return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
771*e4b17023SJohn Marino }
772*e4b17023SJohn Marino
773*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_pi32(__m128d __A)774*e4b17023SJohn Marino _mm_cvttpd_pi32 (__m128d __A)
775*e4b17023SJohn Marino {
776*e4b17023SJohn Marino return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
777*e4b17023SJohn Marino }
778*e4b17023SJohn Marino
779*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_pd(__m64 __A)780*e4b17023SJohn Marino _mm_cvtpi32_pd (__m64 __A)
781*e4b17023SJohn Marino {
782*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
783*e4b17023SJohn Marino }
784*e4b17023SJohn Marino
785*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_epi32(__m128 __A)786*e4b17023SJohn Marino _mm_cvtps_epi32 (__m128 __A)
787*e4b17023SJohn Marino {
788*e4b17023SJohn Marino return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
789*e4b17023SJohn Marino }
790*e4b17023SJohn Marino
791*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_epi32(__m128 __A)792*e4b17023SJohn Marino _mm_cvttps_epi32 (__m128 __A)
793*e4b17023SJohn Marino {
794*e4b17023SJohn Marino return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
795*e4b17023SJohn Marino }
796*e4b17023SJohn Marino
797*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pd(__m128 __A)798*e4b17023SJohn Marino _mm_cvtps_pd (__m128 __A)
799*e4b17023SJohn Marino {
800*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
801*e4b17023SJohn Marino }
802*e4b17023SJohn Marino
803*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si32(__m128d __A)804*e4b17023SJohn Marino _mm_cvtsd_si32 (__m128d __A)
805*e4b17023SJohn Marino {
806*e4b17023SJohn Marino return __builtin_ia32_cvtsd2si ((__v2df) __A);
807*e4b17023SJohn Marino }
808*e4b17023SJohn Marino
809*e4b17023SJohn Marino #ifdef __x86_64__
810*e4b17023SJohn Marino /* Intel intrinsic. */
811*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64(__m128d __A)812*e4b17023SJohn Marino _mm_cvtsd_si64 (__m128d __A)
813*e4b17023SJohn Marino {
814*e4b17023SJohn Marino return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
815*e4b17023SJohn Marino }
816*e4b17023SJohn Marino
817*e4b17023SJohn Marino /* Microsoft intrinsic. */
818*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64x(__m128d __A)819*e4b17023SJohn Marino _mm_cvtsd_si64x (__m128d __A)
820*e4b17023SJohn Marino {
821*e4b17023SJohn Marino return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
822*e4b17023SJohn Marino }
823*e4b17023SJohn Marino #endif
824*e4b17023SJohn Marino
825*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si32(__m128d __A)826*e4b17023SJohn Marino _mm_cvttsd_si32 (__m128d __A)
827*e4b17023SJohn Marino {
828*e4b17023SJohn Marino return __builtin_ia32_cvttsd2si ((__v2df) __A);
829*e4b17023SJohn Marino }
830*e4b17023SJohn Marino
831*e4b17023SJohn Marino #ifdef __x86_64__
832*e4b17023SJohn Marino /* Intel intrinsic. */
833*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64(__m128d __A)834*e4b17023SJohn Marino _mm_cvttsd_si64 (__m128d __A)
835*e4b17023SJohn Marino {
836*e4b17023SJohn Marino return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
837*e4b17023SJohn Marino }
838*e4b17023SJohn Marino
839*e4b17023SJohn Marino /* Microsoft intrinsic. */
840*e4b17023SJohn Marino extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64x(__m128d __A)841*e4b17023SJohn Marino _mm_cvttsd_si64x (__m128d __A)
842*e4b17023SJohn Marino {
843*e4b17023SJohn Marino return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
844*e4b17023SJohn Marino }
845*e4b17023SJohn Marino #endif
846*e4b17023SJohn Marino
847*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_ss(__m128 __A,__m128d __B)848*e4b17023SJohn Marino _mm_cvtsd_ss (__m128 __A, __m128d __B)
849*e4b17023SJohn Marino {
850*e4b17023SJohn Marino return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
851*e4b17023SJohn Marino }
852*e4b17023SJohn Marino
853*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_sd(__m128d __A,int __B)854*e4b17023SJohn Marino _mm_cvtsi32_sd (__m128d __A, int __B)
855*e4b17023SJohn Marino {
856*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
857*e4b17023SJohn Marino }
858*e4b17023SJohn Marino
859*e4b17023SJohn Marino #ifdef __x86_64__
860*e4b17023SJohn Marino /* Intel intrinsic. */
861*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_sd(__m128d __A,long long __B)862*e4b17023SJohn Marino _mm_cvtsi64_sd (__m128d __A, long long __B)
863*e4b17023SJohn Marino {
864*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
865*e4b17023SJohn Marino }
866*e4b17023SJohn Marino
867*e4b17023SJohn Marino /* Microsoft intrinsic. */
868*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_sd(__m128d __A,long long __B)869*e4b17023SJohn Marino _mm_cvtsi64x_sd (__m128d __A, long long __B)
870*e4b17023SJohn Marino {
871*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
872*e4b17023SJohn Marino }
873*e4b17023SJohn Marino #endif
874*e4b17023SJohn Marino
875*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sd(__m128d __A,__m128 __B)876*e4b17023SJohn Marino _mm_cvtss_sd (__m128d __A, __m128 __B)
877*e4b17023SJohn Marino {
878*e4b17023SJohn Marino return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
879*e4b17023SJohn Marino }
880*e4b17023SJohn Marino
881*e4b17023SJohn Marino #ifdef __OPTIMIZE__
882*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pd(__m128d __A,__m128d __B,const int __mask)883*e4b17023SJohn Marino _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
884*e4b17023SJohn Marino {
885*e4b17023SJohn Marino return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
886*e4b17023SJohn Marino }
887*e4b17023SJohn Marino #else
888*e4b17023SJohn Marino #define _mm_shuffle_pd(A, B, N) \
889*e4b17023SJohn Marino ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
890*e4b17023SJohn Marino (__v2df)(__m128d)(B), (int)(N)))
891*e4b17023SJohn Marino #endif
892*e4b17023SJohn Marino
893*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pd(__m128d __A,__m128d __B)894*e4b17023SJohn Marino _mm_unpackhi_pd (__m128d __A, __m128d __B)
895*e4b17023SJohn Marino {
896*e4b17023SJohn Marino return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
897*e4b17023SJohn Marino }
898*e4b17023SJohn Marino
899*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pd(__m128d __A,__m128d __B)900*e4b17023SJohn Marino _mm_unpacklo_pd (__m128d __A, __m128d __B)
901*e4b17023SJohn Marino {
902*e4b17023SJohn Marino return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
903*e4b17023SJohn Marino }
904*e4b17023SJohn Marino
905*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd(__m128d __A,double const * __B)906*e4b17023SJohn Marino _mm_loadh_pd (__m128d __A, double const *__B)
907*e4b17023SJohn Marino {
908*e4b17023SJohn Marino return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
909*e4b17023SJohn Marino }
910*e4b17023SJohn Marino
911*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd(__m128d __A,double const * __B)912*e4b17023SJohn Marino _mm_loadl_pd (__m128d __A, double const *__B)
913*e4b17023SJohn Marino {
914*e4b17023SJohn Marino return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
915*e4b17023SJohn Marino }
916*e4b17023SJohn Marino
917*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pd(__m128d __A)918*e4b17023SJohn Marino _mm_movemask_pd (__m128d __A)
919*e4b17023SJohn Marino {
920*e4b17023SJohn Marino return __builtin_ia32_movmskpd ((__v2df)__A);
921*e4b17023SJohn Marino }
922*e4b17023SJohn Marino
923*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi16(__m128i __A,__m128i __B)924*e4b17023SJohn Marino _mm_packs_epi16 (__m128i __A, __m128i __B)
925*e4b17023SJohn Marino {
926*e4b17023SJohn Marino return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
927*e4b17023SJohn Marino }
928*e4b17023SJohn Marino
929*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi32(__m128i __A,__m128i __B)930*e4b17023SJohn Marino _mm_packs_epi32 (__m128i __A, __m128i __B)
931*e4b17023SJohn Marino {
932*e4b17023SJohn Marino return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
933*e4b17023SJohn Marino }
934*e4b17023SJohn Marino
935*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi16(__m128i __A,__m128i __B)936*e4b17023SJohn Marino _mm_packus_epi16 (__m128i __A, __m128i __B)
937*e4b17023SJohn Marino {
938*e4b17023SJohn Marino return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
939*e4b17023SJohn Marino }
940*e4b17023SJohn Marino
941*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi8(__m128i __A,__m128i __B)942*e4b17023SJohn Marino _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
943*e4b17023SJohn Marino {
944*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
945*e4b17023SJohn Marino }
946*e4b17023SJohn Marino
947*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi16(__m128i __A,__m128i __B)948*e4b17023SJohn Marino _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
949*e4b17023SJohn Marino {
950*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
951*e4b17023SJohn Marino }
952*e4b17023SJohn Marino
953*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi32(__m128i __A,__m128i __B)954*e4b17023SJohn Marino _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
955*e4b17023SJohn Marino {
956*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
957*e4b17023SJohn Marino }
958*e4b17023SJohn Marino
959*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi64(__m128i __A,__m128i __B)960*e4b17023SJohn Marino _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
961*e4b17023SJohn Marino {
962*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
963*e4b17023SJohn Marino }
964*e4b17023SJohn Marino
965*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi8(__m128i __A,__m128i __B)966*e4b17023SJohn Marino _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
967*e4b17023SJohn Marino {
968*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
969*e4b17023SJohn Marino }
970*e4b17023SJohn Marino
971*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi16(__m128i __A,__m128i __B)972*e4b17023SJohn Marino _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
973*e4b17023SJohn Marino {
974*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
975*e4b17023SJohn Marino }
976*e4b17023SJohn Marino
977*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi32(__m128i __A,__m128i __B)978*e4b17023SJohn Marino _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
979*e4b17023SJohn Marino {
980*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
981*e4b17023SJohn Marino }
982*e4b17023SJohn Marino
983*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi64(__m128i __A,__m128i __B)984*e4b17023SJohn Marino _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
985*e4b17023SJohn Marino {
986*e4b17023SJohn Marino return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
987*e4b17023SJohn Marino }
988*e4b17023SJohn Marino
989*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi8(__m128i __A,__m128i __B)990*e4b17023SJohn Marino _mm_add_epi8 (__m128i __A, __m128i __B)
991*e4b17023SJohn Marino {
992*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
993*e4b17023SJohn Marino }
994*e4b17023SJohn Marino
995*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi16(__m128i __A,__m128i __B)996*e4b17023SJohn Marino _mm_add_epi16 (__m128i __A, __m128i __B)
997*e4b17023SJohn Marino {
998*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
999*e4b17023SJohn Marino }
1000*e4b17023SJohn Marino
1001*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi32(__m128i __A,__m128i __B)1002*e4b17023SJohn Marino _mm_add_epi32 (__m128i __A, __m128i __B)
1003*e4b17023SJohn Marino {
1004*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
1005*e4b17023SJohn Marino }
1006*e4b17023SJohn Marino
1007*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi64(__m128i __A,__m128i __B)1008*e4b17023SJohn Marino _mm_add_epi64 (__m128i __A, __m128i __B)
1009*e4b17023SJohn Marino {
1010*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1011*e4b17023SJohn Marino }
1012*e4b17023SJohn Marino
1013*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi8(__m128i __A,__m128i __B)1014*e4b17023SJohn Marino _mm_adds_epi8 (__m128i __A, __m128i __B)
1015*e4b17023SJohn Marino {
1016*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1017*e4b17023SJohn Marino }
1018*e4b17023SJohn Marino
1019*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi16(__m128i __A,__m128i __B)1020*e4b17023SJohn Marino _mm_adds_epi16 (__m128i __A, __m128i __B)
1021*e4b17023SJohn Marino {
1022*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1023*e4b17023SJohn Marino }
1024*e4b17023SJohn Marino
1025*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu8(__m128i __A,__m128i __B)1026*e4b17023SJohn Marino _mm_adds_epu8 (__m128i __A, __m128i __B)
1027*e4b17023SJohn Marino {
1028*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1029*e4b17023SJohn Marino }
1030*e4b17023SJohn Marino
1031*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu16(__m128i __A,__m128i __B)1032*e4b17023SJohn Marino _mm_adds_epu16 (__m128i __A, __m128i __B)
1033*e4b17023SJohn Marino {
1034*e4b17023SJohn Marino return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1035*e4b17023SJohn Marino }
1036*e4b17023SJohn Marino
1037*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi8(__m128i __A,__m128i __B)1038*e4b17023SJohn Marino _mm_sub_epi8 (__m128i __A, __m128i __B)
1039*e4b17023SJohn Marino {
1040*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1041*e4b17023SJohn Marino }
1042*e4b17023SJohn Marino
1043*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi16(__m128i __A,__m128i __B)1044*e4b17023SJohn Marino _mm_sub_epi16 (__m128i __A, __m128i __B)
1045*e4b17023SJohn Marino {
1046*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1047*e4b17023SJohn Marino }
1048*e4b17023SJohn Marino
1049*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi32(__m128i __A,__m128i __B)1050*e4b17023SJohn Marino _mm_sub_epi32 (__m128i __A, __m128i __B)
1051*e4b17023SJohn Marino {
1052*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1053*e4b17023SJohn Marino }
1054*e4b17023SJohn Marino
1055*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi64(__m128i __A,__m128i __B)1056*e4b17023SJohn Marino _mm_sub_epi64 (__m128i __A, __m128i __B)
1057*e4b17023SJohn Marino {
1058*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1059*e4b17023SJohn Marino }
1060*e4b17023SJohn Marino
1061*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi8(__m128i __A,__m128i __B)1062*e4b17023SJohn Marino _mm_subs_epi8 (__m128i __A, __m128i __B)
1063*e4b17023SJohn Marino {
1064*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1065*e4b17023SJohn Marino }
1066*e4b17023SJohn Marino
1067*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi16(__m128i __A,__m128i __B)1068*e4b17023SJohn Marino _mm_subs_epi16 (__m128i __A, __m128i __B)
1069*e4b17023SJohn Marino {
1070*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1071*e4b17023SJohn Marino }
1072*e4b17023SJohn Marino
1073*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu8(__m128i __A,__m128i __B)1074*e4b17023SJohn Marino _mm_subs_epu8 (__m128i __A, __m128i __B)
1075*e4b17023SJohn Marino {
1076*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1077*e4b17023SJohn Marino }
1078*e4b17023SJohn Marino
1079*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu16(__m128i __A,__m128i __B)1080*e4b17023SJohn Marino _mm_subs_epu16 (__m128i __A, __m128i __B)
1081*e4b17023SJohn Marino {
1082*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1083*e4b17023SJohn Marino }
1084*e4b17023SJohn Marino
1085*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_epi16(__m128i __A,__m128i __B)1086*e4b17023SJohn Marino _mm_madd_epi16 (__m128i __A, __m128i __B)
1087*e4b17023SJohn Marino {
1088*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1089*e4b17023SJohn Marino }
1090*e4b17023SJohn Marino
1091*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epi16(__m128i __A,__m128i __B)1092*e4b17023SJohn Marino _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1093*e4b17023SJohn Marino {
1094*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1095*e4b17023SJohn Marino }
1096*e4b17023SJohn Marino
1097*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi16(__m128i __A,__m128i __B)1098*e4b17023SJohn Marino _mm_mullo_epi16 (__m128i __A, __m128i __B)
1099*e4b17023SJohn Marino {
1100*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1101*e4b17023SJohn Marino }
1102*e4b17023SJohn Marino
1103*e4b17023SJohn Marino extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_su32(__m64 __A,__m64 __B)1104*e4b17023SJohn Marino _mm_mul_su32 (__m64 __A, __m64 __B)
1105*e4b17023SJohn Marino {
1106*e4b17023SJohn Marino return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1107*e4b17023SJohn Marino }
1108*e4b17023SJohn Marino
1109*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epu32(__m128i __A,__m128i __B)1110*e4b17023SJohn Marino _mm_mul_epu32 (__m128i __A, __m128i __B)
1111*e4b17023SJohn Marino {
1112*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1113*e4b17023SJohn Marino }
1114*e4b17023SJohn Marino
1115*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi16(__m128i __A,int __B)1116*e4b17023SJohn Marino _mm_slli_epi16 (__m128i __A, int __B)
1117*e4b17023SJohn Marino {
1118*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1119*e4b17023SJohn Marino }
1120*e4b17023SJohn Marino
1121*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi32(__m128i __A,int __B)1122*e4b17023SJohn Marino _mm_slli_epi32 (__m128i __A, int __B)
1123*e4b17023SJohn Marino {
1124*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1125*e4b17023SJohn Marino }
1126*e4b17023SJohn Marino
1127*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi64(__m128i __A,int __B)1128*e4b17023SJohn Marino _mm_slli_epi64 (__m128i __A, int __B)
1129*e4b17023SJohn Marino {
1130*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1131*e4b17023SJohn Marino }
1132*e4b17023SJohn Marino
1133*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi16(__m128i __A,int __B)1134*e4b17023SJohn Marino _mm_srai_epi16 (__m128i __A, int __B)
1135*e4b17023SJohn Marino {
1136*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1137*e4b17023SJohn Marino }
1138*e4b17023SJohn Marino
1139*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi32(__m128i __A,int __B)1140*e4b17023SJohn Marino _mm_srai_epi32 (__m128i __A, int __B)
1141*e4b17023SJohn Marino {
1142*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1143*e4b17023SJohn Marino }
1144*e4b17023SJohn Marino
1145*e4b17023SJohn Marino #ifdef __OPTIMIZE__
1146*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si128(__m128i __A,const int __N)1147*e4b17023SJohn Marino _mm_srli_si128 (__m128i __A, const int __N)
1148*e4b17023SJohn Marino {
1149*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1150*e4b17023SJohn Marino }
1151*e4b17023SJohn Marino
1152*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si128(__m128i __A,const int __N)1153*e4b17023SJohn Marino _mm_slli_si128 (__m128i __A, const int __N)
1154*e4b17023SJohn Marino {
1155*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1156*e4b17023SJohn Marino }
1157*e4b17023SJohn Marino #else
1158*e4b17023SJohn Marino #define _mm_srli_si128(A, N) \
1159*e4b17023SJohn Marino ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1160*e4b17023SJohn Marino #define _mm_slli_si128(A, N) \
1161*e4b17023SJohn Marino ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1162*e4b17023SJohn Marino #endif
1163*e4b17023SJohn Marino
1164*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi16(__m128i __A,int __B)1165*e4b17023SJohn Marino _mm_srli_epi16 (__m128i __A, int __B)
1166*e4b17023SJohn Marino {
1167*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1168*e4b17023SJohn Marino }
1169*e4b17023SJohn Marino
1170*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi32(__m128i __A,int __B)1171*e4b17023SJohn Marino _mm_srli_epi32 (__m128i __A, int __B)
1172*e4b17023SJohn Marino {
1173*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1174*e4b17023SJohn Marino }
1175*e4b17023SJohn Marino
1176*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi64(__m128i __A,int __B)1177*e4b17023SJohn Marino _mm_srli_epi64 (__m128i __A, int __B)
1178*e4b17023SJohn Marino {
1179*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1180*e4b17023SJohn Marino }
1181*e4b17023SJohn Marino
1182*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi16(__m128i __A,__m128i __B)1183*e4b17023SJohn Marino _mm_sll_epi16 (__m128i __A, __m128i __B)
1184*e4b17023SJohn Marino {
1185*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1186*e4b17023SJohn Marino }
1187*e4b17023SJohn Marino
1188*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi32(__m128i __A,__m128i __B)1189*e4b17023SJohn Marino _mm_sll_epi32 (__m128i __A, __m128i __B)
1190*e4b17023SJohn Marino {
1191*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1192*e4b17023SJohn Marino }
1193*e4b17023SJohn Marino
1194*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi64(__m128i __A,__m128i __B)1195*e4b17023SJohn Marino _mm_sll_epi64 (__m128i __A, __m128i __B)
1196*e4b17023SJohn Marino {
1197*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1198*e4b17023SJohn Marino }
1199*e4b17023SJohn Marino
1200*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi16(__m128i __A,__m128i __B)1201*e4b17023SJohn Marino _mm_sra_epi16 (__m128i __A, __m128i __B)
1202*e4b17023SJohn Marino {
1203*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1204*e4b17023SJohn Marino }
1205*e4b17023SJohn Marino
1206*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi32(__m128i __A,__m128i __B)1207*e4b17023SJohn Marino _mm_sra_epi32 (__m128i __A, __m128i __B)
1208*e4b17023SJohn Marino {
1209*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1210*e4b17023SJohn Marino }
1211*e4b17023SJohn Marino
1212*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi16(__m128i __A,__m128i __B)1213*e4b17023SJohn Marino _mm_srl_epi16 (__m128i __A, __m128i __B)
1214*e4b17023SJohn Marino {
1215*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1216*e4b17023SJohn Marino }
1217*e4b17023SJohn Marino
1218*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi32(__m128i __A,__m128i __B)1219*e4b17023SJohn Marino _mm_srl_epi32 (__m128i __A, __m128i __B)
1220*e4b17023SJohn Marino {
1221*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1222*e4b17023SJohn Marino }
1223*e4b17023SJohn Marino
1224*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi64(__m128i __A,__m128i __B)1225*e4b17023SJohn Marino _mm_srl_epi64 (__m128i __A, __m128i __B)
1226*e4b17023SJohn Marino {
1227*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1228*e4b17023SJohn Marino }
1229*e4b17023SJohn Marino
1230*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si128(__m128i __A,__m128i __B)1231*e4b17023SJohn Marino _mm_and_si128 (__m128i __A, __m128i __B)
1232*e4b17023SJohn Marino {
1233*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1234*e4b17023SJohn Marino }
1235*e4b17023SJohn Marino
1236*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si128(__m128i __A,__m128i __B)1237*e4b17023SJohn Marino _mm_andnot_si128 (__m128i __A, __m128i __B)
1238*e4b17023SJohn Marino {
1239*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1240*e4b17023SJohn Marino }
1241*e4b17023SJohn Marino
1242*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si128(__m128i __A,__m128i __B)1243*e4b17023SJohn Marino _mm_or_si128 (__m128i __A, __m128i __B)
1244*e4b17023SJohn Marino {
1245*e4b17023SJohn Marino return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1246*e4b17023SJohn Marino }
1247*e4b17023SJohn Marino
1248*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si128(__m128i __A,__m128i __B)1249*e4b17023SJohn Marino _mm_xor_si128 (__m128i __A, __m128i __B)
1250*e4b17023SJohn Marino {
1251*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1252*e4b17023SJohn Marino }
1253*e4b17023SJohn Marino
1254*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi8(__m128i __A,__m128i __B)1255*e4b17023SJohn Marino _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1256*e4b17023SJohn Marino {
1257*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1258*e4b17023SJohn Marino }
1259*e4b17023SJohn Marino
1260*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi16(__m128i __A,__m128i __B)1261*e4b17023SJohn Marino _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1262*e4b17023SJohn Marino {
1263*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1264*e4b17023SJohn Marino }
1265*e4b17023SJohn Marino
1266*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32(__m128i __A,__m128i __B)1267*e4b17023SJohn Marino _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1268*e4b17023SJohn Marino {
1269*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1270*e4b17023SJohn Marino }
1271*e4b17023SJohn Marino
1272*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi8(__m128i __A,__m128i __B)1273*e4b17023SJohn Marino _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1274*e4b17023SJohn Marino {
1275*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1276*e4b17023SJohn Marino }
1277*e4b17023SJohn Marino
1278*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi16(__m128i __A,__m128i __B)1279*e4b17023SJohn Marino _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1280*e4b17023SJohn Marino {
1281*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1282*e4b17023SJohn Marino }
1283*e4b17023SJohn Marino
1284*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32(__m128i __A,__m128i __B)1285*e4b17023SJohn Marino _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1286*e4b17023SJohn Marino {
1287*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1288*e4b17023SJohn Marino }
1289*e4b17023SJohn Marino
1290*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi8(__m128i __A,__m128i __B)1291*e4b17023SJohn Marino _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1292*e4b17023SJohn Marino {
1293*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1294*e4b17023SJohn Marino }
1295*e4b17023SJohn Marino
1296*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi16(__m128i __A,__m128i __B)1297*e4b17023SJohn Marino _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1298*e4b17023SJohn Marino {
1299*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1300*e4b17023SJohn Marino }
1301*e4b17023SJohn Marino
1302*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32(__m128i __A,__m128i __B)1303*e4b17023SJohn Marino _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1304*e4b17023SJohn Marino {
1305*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1306*e4b17023SJohn Marino }
1307*e4b17023SJohn Marino
1308*e4b17023SJohn Marino #ifdef __OPTIMIZE__
1309*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16(__m128i const __A,int const __N)1310*e4b17023SJohn Marino _mm_extract_epi16 (__m128i const __A, int const __N)
1311*e4b17023SJohn Marino {
1312*e4b17023SJohn Marino return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1313*e4b17023SJohn Marino }
1314*e4b17023SJohn Marino
1315*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi16(__m128i const __A,int const __D,int const __N)1316*e4b17023SJohn Marino _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1317*e4b17023SJohn Marino {
1318*e4b17023SJohn Marino return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1319*e4b17023SJohn Marino }
1320*e4b17023SJohn Marino #else
1321*e4b17023SJohn Marino #define _mm_extract_epi16(A, N) \
1322*e4b17023SJohn Marino ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1323*e4b17023SJohn Marino #define _mm_insert_epi16(A, D, N) \
1324*e4b17023SJohn Marino ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1325*e4b17023SJohn Marino (int)(D), (int)(N)))
1326*e4b17023SJohn Marino #endif
1327*e4b17023SJohn Marino
1328*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi16(__m128i __A,__m128i __B)1329*e4b17023SJohn Marino _mm_max_epi16 (__m128i __A, __m128i __B)
1330*e4b17023SJohn Marino {
1331*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1332*e4b17023SJohn Marino }
1333*e4b17023SJohn Marino
1334*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu8(__m128i __A,__m128i __B)1335*e4b17023SJohn Marino _mm_max_epu8 (__m128i __A, __m128i __B)
1336*e4b17023SJohn Marino {
1337*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1338*e4b17023SJohn Marino }
1339*e4b17023SJohn Marino
1340*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi16(__m128i __A,__m128i __B)1341*e4b17023SJohn Marino _mm_min_epi16 (__m128i __A, __m128i __B)
1342*e4b17023SJohn Marino {
1343*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1344*e4b17023SJohn Marino }
1345*e4b17023SJohn Marino
1346*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu8(__m128i __A,__m128i __B)1347*e4b17023SJohn Marino _mm_min_epu8 (__m128i __A, __m128i __B)
1348*e4b17023SJohn Marino {
1349*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1350*e4b17023SJohn Marino }
1351*e4b17023SJohn Marino
1352*e4b17023SJohn Marino extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_epi8(__m128i __A)1353*e4b17023SJohn Marino _mm_movemask_epi8 (__m128i __A)
1354*e4b17023SJohn Marino {
1355*e4b17023SJohn Marino return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1356*e4b17023SJohn Marino }
1357*e4b17023SJohn Marino
1358*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epu16(__m128i __A,__m128i __B)1359*e4b17023SJohn Marino _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1360*e4b17023SJohn Marino {
1361*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1362*e4b17023SJohn Marino }
1363*e4b17023SJohn Marino
1364*e4b17023SJohn Marino #ifdef __OPTIMIZE__
1365*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflehi_epi16(__m128i __A,const int __mask)1366*e4b17023SJohn Marino _mm_shufflehi_epi16 (__m128i __A, const int __mask)
1367*e4b17023SJohn Marino {
1368*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1369*e4b17023SJohn Marino }
1370*e4b17023SJohn Marino
1371*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflelo_epi16(__m128i __A,const int __mask)1372*e4b17023SJohn Marino _mm_shufflelo_epi16 (__m128i __A, const int __mask)
1373*e4b17023SJohn Marino {
1374*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1375*e4b17023SJohn Marino }
1376*e4b17023SJohn Marino
1377*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi32(__m128i __A,const int __mask)1378*e4b17023SJohn Marino _mm_shuffle_epi32 (__m128i __A, const int __mask)
1379*e4b17023SJohn Marino {
1380*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1381*e4b17023SJohn Marino }
1382*e4b17023SJohn Marino #else
1383*e4b17023SJohn Marino #define _mm_shufflehi_epi16(A, N) \
1384*e4b17023SJohn Marino ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1385*e4b17023SJohn Marino #define _mm_shufflelo_epi16(A, N) \
1386*e4b17023SJohn Marino ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1387*e4b17023SJohn Marino #define _mm_shuffle_epi32(A, N) \
1388*e4b17023SJohn Marino ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1389*e4b17023SJohn Marino #endif
1390*e4b17023SJohn Marino
1391*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmoveu_si128(__m128i __A,__m128i __B,char * __C)1392*e4b17023SJohn Marino _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1393*e4b17023SJohn Marino {
1394*e4b17023SJohn Marino __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1395*e4b17023SJohn Marino }
1396*e4b17023SJohn Marino
1397*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu8(__m128i __A,__m128i __B)1398*e4b17023SJohn Marino _mm_avg_epu8 (__m128i __A, __m128i __B)
1399*e4b17023SJohn Marino {
1400*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1401*e4b17023SJohn Marino }
1402*e4b17023SJohn Marino
1403*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu16(__m128i __A,__m128i __B)1404*e4b17023SJohn Marino _mm_avg_epu16 (__m128i __A, __m128i __B)
1405*e4b17023SJohn Marino {
1406*e4b17023SJohn Marino return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1407*e4b17023SJohn Marino }
1408*e4b17023SJohn Marino
1409*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_epu8(__m128i __A,__m128i __B)1410*e4b17023SJohn Marino _mm_sad_epu8 (__m128i __A, __m128i __B)
1411*e4b17023SJohn Marino {
1412*e4b17023SJohn Marino return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1413*e4b17023SJohn Marino }
1414*e4b17023SJohn Marino
1415*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si32(int * __A,int __B)1416*e4b17023SJohn Marino _mm_stream_si32 (int *__A, int __B)
1417*e4b17023SJohn Marino {
1418*e4b17023SJohn Marino __builtin_ia32_movnti (__A, __B);
1419*e4b17023SJohn Marino }
1420*e4b17023SJohn Marino
1421*e4b17023SJohn Marino #ifdef __x86_64__
1422*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si64(long long int * __A,long long int __B)1423*e4b17023SJohn Marino _mm_stream_si64 (long long int *__A, long long int __B)
1424*e4b17023SJohn Marino {
1425*e4b17023SJohn Marino __builtin_ia32_movnti64 (__A, __B);
1426*e4b17023SJohn Marino }
1427*e4b17023SJohn Marino #endif
1428*e4b17023SJohn Marino
1429*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si128(__m128i * __A,__m128i __B)1430*e4b17023SJohn Marino _mm_stream_si128 (__m128i *__A, __m128i __B)
1431*e4b17023SJohn Marino {
1432*e4b17023SJohn Marino __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1433*e4b17023SJohn Marino }
1434*e4b17023SJohn Marino
1435*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pd(double * __A,__m128d __B)1436*e4b17023SJohn Marino _mm_stream_pd (double *__A, __m128d __B)
1437*e4b17023SJohn Marino {
1438*e4b17023SJohn Marino __builtin_ia32_movntpd (__A, (__v2df)__B);
1439*e4b17023SJohn Marino }
1440*e4b17023SJohn Marino
1441*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflush(void const * __A)1442*e4b17023SJohn Marino _mm_clflush (void const *__A)
1443*e4b17023SJohn Marino {
1444*e4b17023SJohn Marino __builtin_ia32_clflush (__A);
1445*e4b17023SJohn Marino }
1446*e4b17023SJohn Marino
1447*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lfence(void)1448*e4b17023SJohn Marino _mm_lfence (void)
1449*e4b17023SJohn Marino {
1450*e4b17023SJohn Marino __builtin_ia32_lfence ();
1451*e4b17023SJohn Marino }
1452*e4b17023SJohn Marino
1453*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mfence(void)1454*e4b17023SJohn Marino _mm_mfence (void)
1455*e4b17023SJohn Marino {
1456*e4b17023SJohn Marino __builtin_ia32_mfence ();
1457*e4b17023SJohn Marino }
1458*e4b17023SJohn Marino
1459*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si128(int __A)1460*e4b17023SJohn Marino _mm_cvtsi32_si128 (int __A)
1461*e4b17023SJohn Marino {
1462*e4b17023SJohn Marino return _mm_set_epi32 (0, 0, 0, __A);
1463*e4b17023SJohn Marino }
1464*e4b17023SJohn Marino
1465*e4b17023SJohn Marino #ifdef __x86_64__
1466*e4b17023SJohn Marino /* Intel intrinsic. */
1467*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si128(long long __A)1468*e4b17023SJohn Marino _mm_cvtsi64_si128 (long long __A)
1469*e4b17023SJohn Marino {
1470*e4b17023SJohn Marino return _mm_set_epi64x (0, __A);
1471*e4b17023SJohn Marino }
1472*e4b17023SJohn Marino
1473*e4b17023SJohn Marino /* Microsoft intrinsic. */
1474*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si128(long long __A)1475*e4b17023SJohn Marino _mm_cvtsi64x_si128 (long long __A)
1476*e4b17023SJohn Marino {
1477*e4b17023SJohn Marino return _mm_set_epi64x (0, __A);
1478*e4b17023SJohn Marino }
1479*e4b17023SJohn Marino #endif
1480*e4b17023SJohn Marino
1481*e4b17023SJohn Marino /* Casts between various SP, DP, INT vector types. Note that these do no
1482*e4b17023SJohn Marino conversion of values, they just change the type. */
1483*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A)1484*e4b17023SJohn Marino _mm_castpd_ps(__m128d __A)
1485*e4b17023SJohn Marino {
1486*e4b17023SJohn Marino return (__m128) __A;
1487*e4b17023SJohn Marino }
1488*e4b17023SJohn Marino
1489*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A)1490*e4b17023SJohn Marino _mm_castpd_si128(__m128d __A)
1491*e4b17023SJohn Marino {
1492*e4b17023SJohn Marino return (__m128i) __A;
1493*e4b17023SJohn Marino }
1494*e4b17023SJohn Marino
1495*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_pd(__m128 __A)1496*e4b17023SJohn Marino _mm_castps_pd(__m128 __A)
1497*e4b17023SJohn Marino {
1498*e4b17023SJohn Marino return (__m128d) __A;
1499*e4b17023SJohn Marino }
1500*e4b17023SJohn Marino
1501*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_si128(__m128 __A)1502*e4b17023SJohn Marino _mm_castps_si128(__m128 __A)
1503*e4b17023SJohn Marino {
1504*e4b17023SJohn Marino return (__m128i) __A;
1505*e4b17023SJohn Marino }
1506*e4b17023SJohn Marino
1507*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A)1508*e4b17023SJohn Marino _mm_castsi128_ps(__m128i __A)
1509*e4b17023SJohn Marino {
1510*e4b17023SJohn Marino return (__m128) __A;
1511*e4b17023SJohn Marino }
1512*e4b17023SJohn Marino
1513*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A)1514*e4b17023SJohn Marino _mm_castsi128_pd(__m128i __A)
1515*e4b17023SJohn Marino {
1516*e4b17023SJohn Marino return (__m128d) __A;
1517*e4b17023SJohn Marino }
1518*e4b17023SJohn Marino
1519*e4b17023SJohn Marino #endif /* __SSE2__ */
1520*e4b17023SJohn Marino
1521*e4b17023SJohn Marino #endif /* _EMMINTRIN_H_INCLUDED */
1522