xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/emmintrin.h (revision 95059079af47f9a66a175f374f2da1a5020e3255)
138fd1498Szrj /* Copyright (C) 2003-2018 Free Software Foundation, Inc.
238fd1498Szrj 
338fd1498Szrj    This file is part of GCC.
438fd1498Szrj 
538fd1498Szrj    GCC is free software; you can redistribute it and/or modify
638fd1498Szrj    it under the terms of the GNU General Public License as published by
738fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
838fd1498Szrj    any later version.
938fd1498Szrj 
1038fd1498Szrj    GCC is distributed in the hope that it will be useful,
1138fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
1238fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1338fd1498Szrj    GNU General Public License for more details.
1438fd1498Szrj 
1538fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
1638fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
1738fd1498Szrj    3.1, as published by the Free Software Foundation.
1838fd1498Szrj 
1938fd1498Szrj    You should have received a copy of the GNU General Public License and
2038fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
2138fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2238fd1498Szrj    <http://www.gnu.org/licenses/>.  */
2338fd1498Szrj 
2438fd1498Szrj /* Implemented from the specification included in the Intel C++ Compiler
2538fd1498Szrj    User Guide and Reference, version 9.0.  */
2638fd1498Szrj 
2738fd1498Szrj #ifndef _EMMINTRIN_H_INCLUDED
2838fd1498Szrj #define _EMMINTRIN_H_INCLUDED
2938fd1498Szrj 
3038fd1498Szrj /* We need definitions from the SSE header files*/
3138fd1498Szrj #include <xmmintrin.h>
3238fd1498Szrj 
3338fd1498Szrj #ifndef __SSE2__
3438fd1498Szrj #pragma GCC push_options
3538fd1498Szrj #pragma GCC target("sse2")
3638fd1498Szrj #define __DISABLE_SSE2__
3738fd1498Szrj #endif /* __SSE2__ */
3838fd1498Szrj 
3938fd1498Szrj /* SSE2 */
4038fd1498Szrj typedef double __v2df __attribute__ ((__vector_size__ (16)));
4138fd1498Szrj typedef long long __v2di __attribute__ ((__vector_size__ (16)));
4238fd1498Szrj typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
4338fd1498Szrj typedef int __v4si __attribute__ ((__vector_size__ (16)));
4438fd1498Szrj typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
4538fd1498Szrj typedef short __v8hi __attribute__ ((__vector_size__ (16)));
4638fd1498Szrj typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
4738fd1498Szrj typedef char __v16qi __attribute__ ((__vector_size__ (16)));
48*58e805e6Szrj typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
4938fd1498Szrj typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
5038fd1498Szrj 
5138fd1498Szrj /* The Intel API is flexible enough that we must allow aliasing with other
5238fd1498Szrj    vector types, and their scalar components.  */
5338fd1498Szrj typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
5438fd1498Szrj typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
5538fd1498Szrj 
5638fd1498Szrj /* Unaligned version of the same types.  */
5738fd1498Szrj typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
5838fd1498Szrj typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
5938fd1498Szrj 
6038fd1498Szrj /* Create a selector for use with the SHUFPD instruction.  */
6138fd1498Szrj #define _MM_SHUFFLE2(fp1,fp0) \
6238fd1498Szrj  (((fp1) << 1) | (fp0))
6338fd1498Szrj 
6438fd1498Szrj /* Create a vector with element 0 as F and the rest zero.  */
6538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_sd(double __F)6638fd1498Szrj _mm_set_sd (double __F)
6738fd1498Szrj {
6838fd1498Szrj   return __extension__ (__m128d){ __F, 0.0 };
6938fd1498Szrj }
7038fd1498Szrj 
7138fd1498Szrj /* Create a vector with both elements equal to F.  */
7238fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pd(double __F)7338fd1498Szrj _mm_set1_pd (double __F)
7438fd1498Szrj {
7538fd1498Szrj   return __extension__ (__m128d){ __F, __F };
7638fd1498Szrj }
7738fd1498Szrj 
7838fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd1(double __F)7938fd1498Szrj _mm_set_pd1 (double __F)
8038fd1498Szrj {
8138fd1498Szrj   return _mm_set1_pd (__F);
8238fd1498Szrj }
8338fd1498Szrj 
8438fd1498Szrj /* Create a vector with the lower value X and upper value W.  */
8538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pd(double __W,double __X)8638fd1498Szrj _mm_set_pd (double __W, double __X)
8738fd1498Szrj {
8838fd1498Szrj   return __extension__ (__m128d){ __X, __W };
8938fd1498Szrj }
9038fd1498Szrj 
9138fd1498Szrj /* Create a vector with the lower value W and upper value X.  */
9238fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pd(double __W,double __X)9338fd1498Szrj _mm_setr_pd (double __W, double __X)
9438fd1498Szrj {
9538fd1498Szrj   return __extension__ (__m128d){ __W, __X };
9638fd1498Szrj }
9738fd1498Szrj 
9838fd1498Szrj /* Create an undefined vector.  */
9938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_pd(void)10038fd1498Szrj _mm_undefined_pd (void)
10138fd1498Szrj {
10238fd1498Szrj   __m128d __Y = __Y;
10338fd1498Szrj   return __Y;
10438fd1498Szrj }
10538fd1498Szrj 
10638fd1498Szrj /* Create a vector of zeros.  */
10738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd(void)10838fd1498Szrj _mm_setzero_pd (void)
10938fd1498Szrj {
11038fd1498Szrj   return __extension__ (__m128d){ 0.0, 0.0 };
11138fd1498Szrj }
11238fd1498Szrj 
11338fd1498Szrj /* Sets the low DPFP value of A from the low value of B.  */
11438fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_sd(__m128d __A,__m128d __B)11538fd1498Szrj _mm_move_sd (__m128d __A, __m128d __B)
11638fd1498Szrj {
11738fd1498Szrj   return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
11838fd1498Szrj }
11938fd1498Szrj 
12038fd1498Szrj /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
12138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd(double const * __P)12238fd1498Szrj _mm_load_pd (double const *__P)
12338fd1498Szrj {
12438fd1498Szrj   return *(__m128d *)__P;
12538fd1498Szrj }
12638fd1498Szrj 
12738fd1498Szrj /* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
12838fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_pd(double const * __P)12938fd1498Szrj _mm_loadu_pd (double const *__P)
13038fd1498Szrj {
13138fd1498Szrj   return *(__m128d_u *)__P;
13238fd1498Szrj }
13338fd1498Szrj 
13438fd1498Szrj /* Create a vector with all two elements equal to *P.  */
13538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load1_pd(double const * __P)13638fd1498Szrj _mm_load1_pd (double const *__P)
13738fd1498Szrj {
13838fd1498Szrj   return _mm_set1_pd (*__P);
13938fd1498Szrj }
14038fd1498Szrj 
14138fd1498Szrj /* Create a vector with element 0 as *P and the rest zero.  */
14238fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd(double const * __P)14338fd1498Szrj _mm_load_sd (double const *__P)
14438fd1498Szrj {
14538fd1498Szrj   return _mm_set_sd (*__P);
14638fd1498Szrj }
14738fd1498Szrj 
14838fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_pd1(double const * __P)14938fd1498Szrj _mm_load_pd1 (double const *__P)
15038fd1498Szrj {
15138fd1498Szrj   return _mm_load1_pd (__P);
15238fd1498Szrj }
15338fd1498Szrj 
15438fd1498Szrj /* Load two DPFP values in reverse order.  The address must be aligned.  */
15538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadr_pd(double const * __P)15638fd1498Szrj _mm_loadr_pd (double const *__P)
15738fd1498Szrj {
15838fd1498Szrj   __m128d __tmp = _mm_load_pd (__P);
15938fd1498Szrj   return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
16038fd1498Szrj }
16138fd1498Szrj 
16238fd1498Szrj /* Store two DPFP values.  The address must be 16-byte aligned.  */
16338fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd(double * __P,__m128d __A)16438fd1498Szrj _mm_store_pd (double *__P, __m128d __A)
16538fd1498Szrj {
16638fd1498Szrj   *(__m128d *)__P = __A;
16738fd1498Szrj }
16838fd1498Szrj 
16938fd1498Szrj /* Store two DPFP values.  The address need not be 16-byte aligned.  */
17038fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_pd(double * __P,__m128d __A)17138fd1498Szrj _mm_storeu_pd (double *__P, __m128d __A)
17238fd1498Szrj {
17338fd1498Szrj   *(__m128d_u *)__P = __A;
17438fd1498Szrj }
17538fd1498Szrj 
17638fd1498Szrj /* Stores the lower DPFP value.  */
17738fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd(double * __P,__m128d __A)17838fd1498Szrj _mm_store_sd (double *__P, __m128d __A)
17938fd1498Szrj {
18038fd1498Szrj   *__P = ((__v2df)__A)[0];
18138fd1498Szrj }
18238fd1498Szrj 
18338fd1498Szrj extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_f64(__m128d __A)18438fd1498Szrj _mm_cvtsd_f64 (__m128d __A)
18538fd1498Szrj {
18638fd1498Szrj   return ((__v2df)__A)[0];
18738fd1498Szrj }
18838fd1498Szrj 
18938fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd(double * __P,__m128d __A)19038fd1498Szrj _mm_storel_pd (double *__P, __m128d __A)
19138fd1498Szrj {
19238fd1498Szrj   _mm_store_sd (__P, __A);
19338fd1498Szrj }
19438fd1498Szrj 
19538fd1498Szrj /* Stores the upper DPFP value.  */
19638fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeh_pd(double * __P,__m128d __A)19738fd1498Szrj _mm_storeh_pd (double *__P, __m128d __A)
19838fd1498Szrj {
19938fd1498Szrj   *__P = ((__v2df)__A)[1];
20038fd1498Szrj }
20138fd1498Szrj 
20238fd1498Szrj /* Store the lower DPFP value across two words.
20338fd1498Szrj    The address must be 16-byte aligned.  */
20438fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store1_pd(double * __P,__m128d __A)20538fd1498Szrj _mm_store1_pd (double *__P, __m128d __A)
20638fd1498Szrj {
20738fd1498Szrj   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
20838fd1498Szrj }
20938fd1498Szrj 
21038fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_pd1(double * __P,__m128d __A)21138fd1498Szrj _mm_store_pd1 (double *__P, __m128d __A)
21238fd1498Szrj {
21338fd1498Szrj   _mm_store1_pd (__P, __A);
21438fd1498Szrj }
21538fd1498Szrj 
21638fd1498Szrj /* Store two DPFP values in reverse order.  The address must be aligned.  */
21738fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storer_pd(double * __P,__m128d __A)21838fd1498Szrj _mm_storer_pd (double *__P, __m128d __A)
21938fd1498Szrj {
22038fd1498Szrj   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
22138fd1498Szrj }
22238fd1498Szrj 
22338fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si32(__m128i __A)22438fd1498Szrj _mm_cvtsi128_si32 (__m128i __A)
22538fd1498Szrj {
22638fd1498Szrj   return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
22738fd1498Szrj }
22838fd1498Szrj 
22938fd1498Szrj #ifdef __x86_64__
23038fd1498Szrj /* Intel intrinsic.  */
23138fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64(__m128i __A)23238fd1498Szrj _mm_cvtsi128_si64 (__m128i __A)
23338fd1498Szrj {
23438fd1498Szrj   return ((__v2di)__A)[0];
23538fd1498Szrj }
23638fd1498Szrj 
23738fd1498Szrj /* Microsoft intrinsic.  */
23838fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi128_si64x(__m128i __A)23938fd1498Szrj _mm_cvtsi128_si64x (__m128i __A)
24038fd1498Szrj {
24138fd1498Szrj   return ((__v2di)__A)[0];
24238fd1498Szrj }
24338fd1498Szrj #endif
24438fd1498Szrj 
24538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pd(__m128d __A,__m128d __B)24638fd1498Szrj _mm_add_pd (__m128d __A, __m128d __B)
24738fd1498Szrj {
24838fd1498Szrj   return (__m128d) ((__v2df)__A + (__v2df)__B);
24938fd1498Szrj }
25038fd1498Szrj 
25138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_sd(__m128d __A,__m128d __B)25238fd1498Szrj _mm_add_sd (__m128d __A, __m128d __B)
25338fd1498Szrj {
25438fd1498Szrj   return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
25538fd1498Szrj }
25638fd1498Szrj 
25738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pd(__m128d __A,__m128d __B)25838fd1498Szrj _mm_sub_pd (__m128d __A, __m128d __B)
25938fd1498Szrj {
26038fd1498Szrj   return (__m128d) ((__v2df)__A - (__v2df)__B);
26138fd1498Szrj }
26238fd1498Szrj 
26338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_sd(__m128d __A,__m128d __B)26438fd1498Szrj _mm_sub_sd (__m128d __A, __m128d __B)
26538fd1498Szrj {
26638fd1498Szrj   return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
26738fd1498Szrj }
26838fd1498Szrj 
26938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_pd(__m128d __A,__m128d __B)27038fd1498Szrj _mm_mul_pd (__m128d __A, __m128d __B)
27138fd1498Szrj {
27238fd1498Szrj   return (__m128d) ((__v2df)__A * (__v2df)__B);
27338fd1498Szrj }
27438fd1498Szrj 
27538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_sd(__m128d __A,__m128d __B)27638fd1498Szrj _mm_mul_sd (__m128d __A, __m128d __B)
27738fd1498Szrj {
27838fd1498Szrj   return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
27938fd1498Szrj }
28038fd1498Szrj 
28138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_pd(__m128d __A,__m128d __B)28238fd1498Szrj _mm_div_pd (__m128d __A, __m128d __B)
28338fd1498Szrj {
28438fd1498Szrj   return (__m128d) ((__v2df)__A / (__v2df)__B);
28538fd1498Szrj }
28638fd1498Szrj 
28738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_sd(__m128d __A,__m128d __B)28838fd1498Szrj _mm_div_sd (__m128d __A, __m128d __B)
28938fd1498Szrj {
29038fd1498Szrj   return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
29138fd1498Szrj }
29238fd1498Szrj 
29338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_pd(__m128d __A)29438fd1498Szrj _mm_sqrt_pd (__m128d __A)
29538fd1498Szrj {
29638fd1498Szrj   return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
29738fd1498Szrj }
29838fd1498Szrj 
29938fd1498Szrj /* Return pair {sqrt (B[0]), A[1]}.  */
30038fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_sd(__m128d __A,__m128d __B)30138fd1498Szrj _mm_sqrt_sd (__m128d __A, __m128d __B)
30238fd1498Szrj {
30338fd1498Szrj   __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
30438fd1498Szrj   return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
30538fd1498Szrj }
30638fd1498Szrj 
30738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_pd(__m128d __A,__m128d __B)30838fd1498Szrj _mm_min_pd (__m128d __A, __m128d __B)
30938fd1498Szrj {
31038fd1498Szrj   return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
31138fd1498Szrj }
31238fd1498Szrj 
31338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_sd(__m128d __A,__m128d __B)31438fd1498Szrj _mm_min_sd (__m128d __A, __m128d __B)
31538fd1498Szrj {
31638fd1498Szrj   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
31738fd1498Szrj }
31838fd1498Szrj 
31938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_pd(__m128d __A,__m128d __B)32038fd1498Szrj _mm_max_pd (__m128d __A, __m128d __B)
32138fd1498Szrj {
32238fd1498Szrj   return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
32338fd1498Szrj }
32438fd1498Szrj 
32538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_sd(__m128d __A,__m128d __B)32638fd1498Szrj _mm_max_sd (__m128d __A, __m128d __B)
32738fd1498Szrj {
32838fd1498Szrj   return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
32938fd1498Szrj }
33038fd1498Szrj 
33138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_pd(__m128d __A,__m128d __B)33238fd1498Szrj _mm_and_pd (__m128d __A, __m128d __B)
33338fd1498Szrj {
33438fd1498Szrj   return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
33538fd1498Szrj }
33638fd1498Szrj 
33738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_pd(__m128d __A,__m128d __B)33838fd1498Szrj _mm_andnot_pd (__m128d __A, __m128d __B)
33938fd1498Szrj {
34038fd1498Szrj   return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
34138fd1498Szrj }
34238fd1498Szrj 
34338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_pd(__m128d __A,__m128d __B)34438fd1498Szrj _mm_or_pd (__m128d __A, __m128d __B)
34538fd1498Szrj {
34638fd1498Szrj   return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
34738fd1498Szrj }
34838fd1498Szrj 
34938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_pd(__m128d __A,__m128d __B)35038fd1498Szrj _mm_xor_pd (__m128d __A, __m128d __B)
35138fd1498Szrj {
35238fd1498Szrj   return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
35338fd1498Szrj }
35438fd1498Szrj 
35538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pd(__m128d __A,__m128d __B)35638fd1498Szrj _mm_cmpeq_pd (__m128d __A, __m128d __B)
35738fd1498Szrj {
35838fd1498Szrj   return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
35938fd1498Szrj }
36038fd1498Szrj 
36138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_pd(__m128d __A,__m128d __B)36238fd1498Szrj _mm_cmplt_pd (__m128d __A, __m128d __B)
36338fd1498Szrj {
36438fd1498Szrj   return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
36538fd1498Szrj }
36638fd1498Szrj 
36738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_pd(__m128d __A,__m128d __B)36838fd1498Szrj _mm_cmple_pd (__m128d __A, __m128d __B)
36938fd1498Szrj {
37038fd1498Szrj   return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
37138fd1498Szrj }
37238fd1498Szrj 
37338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pd(__m128d __A,__m128d __B)37438fd1498Szrj _mm_cmpgt_pd (__m128d __A, __m128d __B)
37538fd1498Szrj {
37638fd1498Szrj   return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
37738fd1498Szrj }
37838fd1498Szrj 
37938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_pd(__m128d __A,__m128d __B)38038fd1498Szrj _mm_cmpge_pd (__m128d __A, __m128d __B)
38138fd1498Szrj {
38238fd1498Szrj   return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
38338fd1498Szrj }
38438fd1498Szrj 
38538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_pd(__m128d __A,__m128d __B)38638fd1498Szrj _mm_cmpneq_pd (__m128d __A, __m128d __B)
38738fd1498Szrj {
38838fd1498Szrj   return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
38938fd1498Szrj }
39038fd1498Szrj 
39138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_pd(__m128d __A,__m128d __B)39238fd1498Szrj _mm_cmpnlt_pd (__m128d __A, __m128d __B)
39338fd1498Szrj {
39438fd1498Szrj   return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
39538fd1498Szrj }
39638fd1498Szrj 
39738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_pd(__m128d __A,__m128d __B)39838fd1498Szrj _mm_cmpnle_pd (__m128d __A, __m128d __B)
39938fd1498Szrj {
40038fd1498Szrj   return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
40138fd1498Szrj }
40238fd1498Szrj 
40338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_pd(__m128d __A,__m128d __B)40438fd1498Szrj _mm_cmpngt_pd (__m128d __A, __m128d __B)
40538fd1498Szrj {
40638fd1498Szrj   return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
40738fd1498Szrj }
40838fd1498Szrj 
40938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_pd(__m128d __A,__m128d __B)41038fd1498Szrj _mm_cmpnge_pd (__m128d __A, __m128d __B)
41138fd1498Szrj {
41238fd1498Szrj   return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
41338fd1498Szrj }
41438fd1498Szrj 
41538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_pd(__m128d __A,__m128d __B)41638fd1498Szrj _mm_cmpord_pd (__m128d __A, __m128d __B)
41738fd1498Szrj {
41838fd1498Szrj   return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
41938fd1498Szrj }
42038fd1498Szrj 
42138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_pd(__m128d __A,__m128d __B)42238fd1498Szrj _mm_cmpunord_pd (__m128d __A, __m128d __B)
42338fd1498Szrj {
42438fd1498Szrj   return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
42538fd1498Szrj }
42638fd1498Szrj 
42738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_sd(__m128d __A,__m128d __B)42838fd1498Szrj _mm_cmpeq_sd (__m128d __A, __m128d __B)
42938fd1498Szrj {
43038fd1498Szrj   return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
43138fd1498Szrj }
43238fd1498Szrj 
43338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_sd(__m128d __A,__m128d __B)43438fd1498Szrj _mm_cmplt_sd (__m128d __A, __m128d __B)
43538fd1498Szrj {
43638fd1498Szrj   return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
43738fd1498Szrj }
43838fd1498Szrj 
43938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmple_sd(__m128d __A,__m128d __B)44038fd1498Szrj _mm_cmple_sd (__m128d __A, __m128d __B)
44138fd1498Szrj {
44238fd1498Szrj   return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
44338fd1498Szrj }
44438fd1498Szrj 
44538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_sd(__m128d __A,__m128d __B)44638fd1498Szrj _mm_cmpgt_sd (__m128d __A, __m128d __B)
44738fd1498Szrj {
44838fd1498Szrj   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
44938fd1498Szrj 					 (__v2df)
45038fd1498Szrj 					 __builtin_ia32_cmpltsd ((__v2df) __B,
45138fd1498Szrj 								 (__v2df)
45238fd1498Szrj 								 __A));
45338fd1498Szrj }
45438fd1498Szrj 
45538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpge_sd(__m128d __A,__m128d __B)45638fd1498Szrj _mm_cmpge_sd (__m128d __A, __m128d __B)
45738fd1498Szrj {
45838fd1498Szrj   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
45938fd1498Szrj 					 (__v2df)
46038fd1498Szrj 					 __builtin_ia32_cmplesd ((__v2df) __B,
46138fd1498Szrj 								 (__v2df)
46238fd1498Szrj 								 __A));
46338fd1498Szrj }
46438fd1498Szrj 
46538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpneq_sd(__m128d __A,__m128d __B)46638fd1498Szrj _mm_cmpneq_sd (__m128d __A, __m128d __B)
46738fd1498Szrj {
46838fd1498Szrj   return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
46938fd1498Szrj }
47038fd1498Szrj 
47138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnlt_sd(__m128d __A,__m128d __B)47238fd1498Szrj _mm_cmpnlt_sd (__m128d __A, __m128d __B)
47338fd1498Szrj {
47438fd1498Szrj   return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
47538fd1498Szrj }
47638fd1498Szrj 
47738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnle_sd(__m128d __A,__m128d __B)47838fd1498Szrj _mm_cmpnle_sd (__m128d __A, __m128d __B)
47938fd1498Szrj {
48038fd1498Szrj   return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
48138fd1498Szrj }
48238fd1498Szrj 
48338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpngt_sd(__m128d __A,__m128d __B)48438fd1498Szrj _mm_cmpngt_sd (__m128d __A, __m128d __B)
48538fd1498Szrj {
48638fd1498Szrj   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
48738fd1498Szrj 					 (__v2df)
48838fd1498Szrj 					 __builtin_ia32_cmpnltsd ((__v2df) __B,
48938fd1498Szrj 								  (__v2df)
49038fd1498Szrj 								  __A));
49138fd1498Szrj }
49238fd1498Szrj 
49338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpnge_sd(__m128d __A,__m128d __B)49438fd1498Szrj _mm_cmpnge_sd (__m128d __A, __m128d __B)
49538fd1498Szrj {
49638fd1498Szrj   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
49738fd1498Szrj 					 (__v2df)
49838fd1498Szrj 					 __builtin_ia32_cmpnlesd ((__v2df) __B,
49938fd1498Szrj 								  (__v2df)
50038fd1498Szrj 								  __A));
50138fd1498Szrj }
50238fd1498Szrj 
50338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpord_sd(__m128d __A,__m128d __B)50438fd1498Szrj _mm_cmpord_sd (__m128d __A, __m128d __B)
50538fd1498Szrj {
50638fd1498Szrj   return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
50738fd1498Szrj }
50838fd1498Szrj 
50938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpunord_sd(__m128d __A,__m128d __B)51038fd1498Szrj _mm_cmpunord_sd (__m128d __A, __m128d __B)
51138fd1498Szrj {
51238fd1498Szrj   return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
51338fd1498Szrj }
51438fd1498Szrj 
51538fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comieq_sd(__m128d __A,__m128d __B)51638fd1498Szrj _mm_comieq_sd (__m128d __A, __m128d __B)
51738fd1498Szrj {
51838fd1498Szrj   return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
51938fd1498Szrj }
52038fd1498Szrj 
52138fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comilt_sd(__m128d __A,__m128d __B)52238fd1498Szrj _mm_comilt_sd (__m128d __A, __m128d __B)
52338fd1498Szrj {
52438fd1498Szrj   return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
52538fd1498Szrj }
52638fd1498Szrj 
52738fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comile_sd(__m128d __A,__m128d __B)52838fd1498Szrj _mm_comile_sd (__m128d __A, __m128d __B)
52938fd1498Szrj {
53038fd1498Szrj   return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
53138fd1498Szrj }
53238fd1498Szrj 
53338fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comigt_sd(__m128d __A,__m128d __B)53438fd1498Szrj _mm_comigt_sd (__m128d __A, __m128d __B)
53538fd1498Szrj {
53638fd1498Szrj   return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
53738fd1498Szrj }
53838fd1498Szrj 
53938fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comige_sd(__m128d __A,__m128d __B)54038fd1498Szrj _mm_comige_sd (__m128d __A, __m128d __B)
54138fd1498Szrj {
54238fd1498Szrj   return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
54338fd1498Szrj }
54438fd1498Szrj 
54538fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_comineq_sd(__m128d __A,__m128d __B)54638fd1498Szrj _mm_comineq_sd (__m128d __A, __m128d __B)
54738fd1498Szrj {
54838fd1498Szrj   return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
54938fd1498Szrj }
55038fd1498Szrj 
55138fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomieq_sd(__m128d __A,__m128d __B)55238fd1498Szrj _mm_ucomieq_sd (__m128d __A, __m128d __B)
55338fd1498Szrj {
55438fd1498Szrj   return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
55538fd1498Szrj }
55638fd1498Szrj 
55738fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomilt_sd(__m128d __A,__m128d __B)55838fd1498Szrj _mm_ucomilt_sd (__m128d __A, __m128d __B)
55938fd1498Szrj {
56038fd1498Szrj   return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
56138fd1498Szrj }
56238fd1498Szrj 
56338fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomile_sd(__m128d __A,__m128d __B)56438fd1498Szrj _mm_ucomile_sd (__m128d __A, __m128d __B)
56538fd1498Szrj {
56638fd1498Szrj   return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
56738fd1498Szrj }
56838fd1498Szrj 
56938fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomigt_sd(__m128d __A,__m128d __B)57038fd1498Szrj _mm_ucomigt_sd (__m128d __A, __m128d __B)
57138fd1498Szrj {
57238fd1498Szrj   return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
57338fd1498Szrj }
57438fd1498Szrj 
57538fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomige_sd(__m128d __A,__m128d __B)57638fd1498Szrj _mm_ucomige_sd (__m128d __A, __m128d __B)
57738fd1498Szrj {
57838fd1498Szrj   return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
57938fd1498Szrj }
58038fd1498Szrj 
58138fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_ucomineq_sd(__m128d __A,__m128d __B)58238fd1498Szrj _mm_ucomineq_sd (__m128d __A, __m128d __B)
58338fd1498Szrj {
58438fd1498Szrj   return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
58538fd1498Szrj }
58638fd1498Szrj 
58738fd1498Szrj /* Create a vector of Qi, where i is the element number.  */
58838fd1498Szrj 
58938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64x(long long __q1,long long __q0)59038fd1498Szrj _mm_set_epi64x (long long __q1, long long __q0)
59138fd1498Szrj {
59238fd1498Szrj   return __extension__ (__m128i)(__v2di){ __q0, __q1 };
59338fd1498Szrj }
59438fd1498Szrj 
59538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi64(__m64 __q1,__m64 __q0)59638fd1498Szrj _mm_set_epi64 (__m64 __q1,  __m64 __q0)
59738fd1498Szrj {
59838fd1498Szrj   return _mm_set_epi64x ((long long)__q1, (long long)__q0);
59938fd1498Szrj }
60038fd1498Szrj 
60138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi32(int __q3,int __q2,int __q1,int __q0)60238fd1498Szrj _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
60338fd1498Szrj {
60438fd1498Szrj   return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
60538fd1498Szrj }
60638fd1498Szrj 
60738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi16(short __q7,short __q6,short __q5,short __q4,short __q3,short __q2,short __q1,short __q0)60838fd1498Szrj _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
60938fd1498Szrj 	       short __q3, short __q2, short __q1, short __q0)
61038fd1498Szrj {
61138fd1498Szrj   return __extension__ (__m128i)(__v8hi){
61238fd1498Szrj     __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
61338fd1498Szrj }
61438fd1498Szrj 
61538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_epi8(char __q15,char __q14,char __q13,char __q12,char __q11,char __q10,char __q09,char __q08,char __q07,char __q06,char __q05,char __q04,char __q03,char __q02,char __q01,char __q00)61638fd1498Szrj _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
61738fd1498Szrj 	      char __q11, char __q10, char __q09, char __q08,
61838fd1498Szrj 	      char __q07, char __q06, char __q05, char __q04,
61938fd1498Szrj 	      char __q03, char __q02, char __q01, char __q00)
62038fd1498Szrj {
62138fd1498Szrj   return __extension__ (__m128i)(__v16qi){
62238fd1498Szrj     __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
62338fd1498Szrj     __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
62438fd1498Szrj   };
62538fd1498Szrj }
62638fd1498Szrj 
62738fd1498Szrj /* Set all of the elements of the vector to A.  */
62838fd1498Szrj 
62938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64x(long long __A)63038fd1498Szrj _mm_set1_epi64x (long long __A)
63138fd1498Szrj {
63238fd1498Szrj   return _mm_set_epi64x (__A, __A);
63338fd1498Szrj }
63438fd1498Szrj 
63538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi64(__m64 __A)63638fd1498Szrj _mm_set1_epi64 (__m64 __A)
63738fd1498Szrj {
63838fd1498Szrj   return _mm_set_epi64 (__A, __A);
63938fd1498Szrj }
64038fd1498Szrj 
64138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi32(int __A)64238fd1498Szrj _mm_set1_epi32 (int __A)
64338fd1498Szrj {
64438fd1498Szrj   return _mm_set_epi32 (__A, __A, __A, __A);
64538fd1498Szrj }
64638fd1498Szrj 
64738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi16(short __A)64838fd1498Szrj _mm_set1_epi16 (short __A)
64938fd1498Szrj {
65038fd1498Szrj   return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
65138fd1498Szrj }
65238fd1498Szrj 
65338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_epi8(char __A)65438fd1498Szrj _mm_set1_epi8 (char __A)
65538fd1498Szrj {
65638fd1498Szrj   return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
65738fd1498Szrj 		       __A, __A, __A, __A, __A, __A, __A, __A);
65838fd1498Szrj }
65938fd1498Szrj 
66038fd1498Szrj /* Create a vector of Qi, where i is the element number.
66138fd1498Szrj    The parameter order is reversed from the _mm_set_epi* functions.  */
66238fd1498Szrj 
66338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi64(__m64 __q0,__m64 __q1)66438fd1498Szrj _mm_setr_epi64 (__m64 __q0, __m64 __q1)
66538fd1498Szrj {
66638fd1498Szrj   return _mm_set_epi64 (__q1, __q0);
66738fd1498Szrj }
66838fd1498Szrj 
66938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi32(int __q0,int __q1,int __q2,int __q3)67038fd1498Szrj _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
67138fd1498Szrj {
67238fd1498Szrj   return _mm_set_epi32 (__q3, __q2, __q1, __q0);
67338fd1498Szrj }
67438fd1498Szrj 
67538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi16(short __q0,short __q1,short __q2,short __q3,short __q4,short __q5,short __q6,short __q7)67638fd1498Szrj _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
67738fd1498Szrj 	        short __q4, short __q5, short __q6, short __q7)
67838fd1498Szrj {
67938fd1498Szrj   return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
68038fd1498Szrj }
68138fd1498Szrj 
68238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_epi8(char __q00,char __q01,char __q02,char __q03,char __q04,char __q05,char __q06,char __q07,char __q08,char __q09,char __q10,char __q11,char __q12,char __q13,char __q14,char __q15)68338fd1498Szrj _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
68438fd1498Szrj 	       char __q04, char __q05, char __q06, char __q07,
68538fd1498Szrj 	       char __q08, char __q09, char __q10, char __q11,
68638fd1498Szrj 	       char __q12, char __q13, char __q14, char __q15)
68738fd1498Szrj {
68838fd1498Szrj   return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
68938fd1498Szrj 		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
69038fd1498Szrj }
69138fd1498Szrj 
69238fd1498Szrj /* Create a vector with element 0 as *P and the rest zero.  */
69338fd1498Szrj 
69438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_si128(__m128i const * __P)69538fd1498Szrj _mm_load_si128 (__m128i const *__P)
69638fd1498Szrj {
69738fd1498Szrj   return *__P;
69838fd1498Szrj }
69938fd1498Szrj 
70038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si128(__m128i_u const * __P)70138fd1498Szrj _mm_loadu_si128 (__m128i_u const *__P)
70238fd1498Szrj {
70338fd1498Szrj   return *__P;
70438fd1498Szrj }
70538fd1498Szrj 
70638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_epi64(__m128i_u const * __P)70738fd1498Szrj _mm_loadl_epi64 (__m128i_u const *__P)
70838fd1498Szrj {
70938fd1498Szrj   return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
71038fd1498Szrj }
71138fd1498Szrj 
71238fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_si128(__m128i * __P,__m128i __B)71338fd1498Szrj _mm_store_si128 (__m128i *__P, __m128i __B)
71438fd1498Szrj {
71538fd1498Szrj   *__P = __B;
71638fd1498Szrj }
71738fd1498Szrj 
71838fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storeu_si128(__m128i_u * __P,__m128i __B)71938fd1498Szrj _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
72038fd1498Szrj {
72138fd1498Szrj   *__P = __B;
72238fd1498Szrj }
72338fd1498Szrj 
72438fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_epi64(__m128i_u * __P,__m128i __B)72538fd1498Szrj _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
72638fd1498Szrj {
72738fd1498Szrj   *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
72838fd1498Szrj }
72938fd1498Szrj 
73038fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movepi64_pi64(__m128i __B)73138fd1498Szrj _mm_movepi64_pi64 (__m128i __B)
73238fd1498Szrj {
73338fd1498Szrj   return (__m64) ((__v2di)__B)[0];
73438fd1498Szrj }
73538fd1498Szrj 
73638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movpi64_epi64(__m64 __A)73738fd1498Szrj _mm_movpi64_epi64 (__m64 __A)
73838fd1498Szrj {
73938fd1498Szrj   return _mm_set_epi64 ((__m64)0LL, __A);
74038fd1498Szrj }
74138fd1498Szrj 
74238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_move_epi64(__m128i __A)74338fd1498Szrj _mm_move_epi64 (__m128i __A)
74438fd1498Szrj {
74538fd1498Szrj   return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
74638fd1498Szrj }
74738fd1498Szrj 
74838fd1498Szrj /* Create an undefined vector.  */
74938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_si128(void)75038fd1498Szrj _mm_undefined_si128 (void)
75138fd1498Szrj {
75238fd1498Szrj   __m128i __Y = __Y;
75338fd1498Szrj   return __Y;
75438fd1498Szrj }
75538fd1498Szrj 
75638fd1498Szrj /* Create a vector of zeros.  */
75738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128(void)75838fd1498Szrj _mm_setzero_si128 (void)
75938fd1498Szrj {
76038fd1498Szrj   return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
76138fd1498Szrj }
76238fd1498Szrj 
76338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_pd(__m128i __A)76438fd1498Szrj _mm_cvtepi32_pd (__m128i __A)
76538fd1498Szrj {
76638fd1498Szrj   return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
76738fd1498Szrj }
76838fd1498Szrj 
76938fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_ps(__m128i __A)77038fd1498Szrj _mm_cvtepi32_ps (__m128i __A)
77138fd1498Szrj {
77238fd1498Szrj   return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
77338fd1498Szrj }
77438fd1498Szrj 
77538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_epi32(__m128d __A)77638fd1498Szrj _mm_cvtpd_epi32 (__m128d __A)
77738fd1498Szrj {
77838fd1498Szrj   return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
77938fd1498Szrj }
78038fd1498Szrj 
78138fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_pi32(__m128d __A)78238fd1498Szrj _mm_cvtpd_pi32 (__m128d __A)
78338fd1498Szrj {
78438fd1498Szrj   return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
78538fd1498Szrj }
78638fd1498Szrj 
78738fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpd_ps(__m128d __A)78838fd1498Szrj _mm_cvtpd_ps (__m128d __A)
78938fd1498Szrj {
79038fd1498Szrj   return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
79138fd1498Szrj }
79238fd1498Szrj 
79338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_epi32(__m128d __A)79438fd1498Szrj _mm_cvttpd_epi32 (__m128d __A)
79538fd1498Szrj {
79638fd1498Szrj   return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
79738fd1498Szrj }
79838fd1498Szrj 
79938fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttpd_pi32(__m128d __A)80038fd1498Szrj _mm_cvttpd_pi32 (__m128d __A)
80138fd1498Szrj {
80238fd1498Szrj   return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
80338fd1498Szrj }
80438fd1498Szrj 
80538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtpi32_pd(__m64 __A)80638fd1498Szrj _mm_cvtpi32_pd (__m64 __A)
80738fd1498Szrj {
80838fd1498Szrj   return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
80938fd1498Szrj }
81038fd1498Szrj 
81138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_epi32(__m128 __A)81238fd1498Szrj _mm_cvtps_epi32 (__m128 __A)
81338fd1498Szrj {
81438fd1498Szrj   return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
81538fd1498Szrj }
81638fd1498Szrj 
81738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttps_epi32(__m128 __A)81838fd1498Szrj _mm_cvttps_epi32 (__m128 __A)
81938fd1498Szrj {
82038fd1498Szrj   return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
82138fd1498Szrj }
82238fd1498Szrj 
82338fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtps_pd(__m128 __A)82438fd1498Szrj _mm_cvtps_pd (__m128 __A)
82538fd1498Szrj {
82638fd1498Szrj   return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
82738fd1498Szrj }
82838fd1498Szrj 
82938fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si32(__m128d __A)83038fd1498Szrj _mm_cvtsd_si32 (__m128d __A)
83138fd1498Szrj {
83238fd1498Szrj   return __builtin_ia32_cvtsd2si ((__v2df) __A);
83338fd1498Szrj }
83438fd1498Szrj 
83538fd1498Szrj #ifdef __x86_64__
83638fd1498Szrj /* Intel intrinsic.  */
83738fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64(__m128d __A)83838fd1498Szrj _mm_cvtsd_si64 (__m128d __A)
83938fd1498Szrj {
84038fd1498Szrj   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
84138fd1498Szrj }
84238fd1498Szrj 
84338fd1498Szrj /* Microsoft intrinsic.  */
84438fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_si64x(__m128d __A)84538fd1498Szrj _mm_cvtsd_si64x (__m128d __A)
84638fd1498Szrj {
84738fd1498Szrj   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
84838fd1498Szrj }
84938fd1498Szrj #endif
85038fd1498Szrj 
85138fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si32(__m128d __A)85238fd1498Szrj _mm_cvttsd_si32 (__m128d __A)
85338fd1498Szrj {
85438fd1498Szrj   return __builtin_ia32_cvttsd2si ((__v2df) __A);
85538fd1498Szrj }
85638fd1498Szrj 
85738fd1498Szrj #ifdef __x86_64__
85838fd1498Szrj /* Intel intrinsic.  */
85938fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64(__m128d __A)86038fd1498Szrj _mm_cvttsd_si64 (__m128d __A)
86138fd1498Szrj {
86238fd1498Szrj   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
86338fd1498Szrj }
86438fd1498Szrj 
86538fd1498Szrj /* Microsoft intrinsic.  */
86638fd1498Szrj extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvttsd_si64x(__m128d __A)86738fd1498Szrj _mm_cvttsd_si64x (__m128d __A)
86838fd1498Szrj {
86938fd1498Szrj   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
87038fd1498Szrj }
87138fd1498Szrj #endif
87238fd1498Szrj 
87338fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsd_ss(__m128 __A,__m128d __B)87438fd1498Szrj _mm_cvtsd_ss (__m128 __A, __m128d __B)
87538fd1498Szrj {
87638fd1498Szrj   return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
87738fd1498Szrj }
87838fd1498Szrj 
87938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_sd(__m128d __A,int __B)88038fd1498Szrj _mm_cvtsi32_sd (__m128d __A, int __B)
88138fd1498Szrj {
88238fd1498Szrj   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
88338fd1498Szrj }
88438fd1498Szrj 
88538fd1498Szrj #ifdef __x86_64__
88638fd1498Szrj /* Intel intrinsic.  */
88738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_sd(__m128d __A,long long __B)88838fd1498Szrj _mm_cvtsi64_sd (__m128d __A, long long __B)
88938fd1498Szrj {
89038fd1498Szrj   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
89138fd1498Szrj }
89238fd1498Szrj 
89338fd1498Szrj /* Microsoft intrinsic.  */
89438fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_sd(__m128d __A,long long __B)89538fd1498Szrj _mm_cvtsi64x_sd (__m128d __A, long long __B)
89638fd1498Szrj {
89738fd1498Szrj   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
89838fd1498Szrj }
89938fd1498Szrj #endif
90038fd1498Szrj 
90138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_sd(__m128d __A,__m128 __B)90238fd1498Szrj _mm_cvtss_sd (__m128d __A, __m128 __B)
90338fd1498Szrj {
90438fd1498Szrj   return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
90538fd1498Szrj }
90638fd1498Szrj 
90738fd1498Szrj #ifdef __OPTIMIZE__
90838fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pd(__m128d __A,__m128d __B,const int __mask)90938fd1498Szrj _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
91038fd1498Szrj {
91138fd1498Szrj   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
91238fd1498Szrj }
91338fd1498Szrj #else
91438fd1498Szrj #define _mm_shuffle_pd(A, B, N)						\
91538fd1498Szrj   ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
91638fd1498Szrj 				   (__v2df)(__m128d)(B), (int)(N)))
91738fd1498Szrj #endif
91838fd1498Szrj 
91938fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pd(__m128d __A,__m128d __B)92038fd1498Szrj _mm_unpackhi_pd (__m128d __A, __m128d __B)
92138fd1498Szrj {
92238fd1498Szrj   return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
92338fd1498Szrj }
92438fd1498Szrj 
92538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pd(__m128d __A,__m128d __B)92638fd1498Szrj _mm_unpacklo_pd (__m128d __A, __m128d __B)
92738fd1498Szrj {
92838fd1498Szrj   return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
92938fd1498Szrj }
93038fd1498Szrj 
93138fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd(__m128d __A,double const * __B)93238fd1498Szrj _mm_loadh_pd (__m128d __A, double const *__B)
93338fd1498Szrj {
93438fd1498Szrj   return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
93538fd1498Szrj }
93638fd1498Szrj 
93738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd(__m128d __A,double const * __B)93838fd1498Szrj _mm_loadl_pd (__m128d __A, double const *__B)
93938fd1498Szrj {
94038fd1498Szrj   return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
94138fd1498Szrj }
94238fd1498Szrj 
94338fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pd(__m128d __A)94438fd1498Szrj _mm_movemask_pd (__m128d __A)
94538fd1498Szrj {
94638fd1498Szrj   return __builtin_ia32_movmskpd ((__v2df)__A);
94738fd1498Szrj }
94838fd1498Szrj 
94938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi16(__m128i __A,__m128i __B)95038fd1498Szrj _mm_packs_epi16 (__m128i __A, __m128i __B)
95138fd1498Szrj {
95238fd1498Szrj   return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
95338fd1498Szrj }
95438fd1498Szrj 
95538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_epi32(__m128i __A,__m128i __B)95638fd1498Szrj _mm_packs_epi32 (__m128i __A, __m128i __B)
95738fd1498Szrj {
95838fd1498Szrj   return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
95938fd1498Szrj }
96038fd1498Szrj 
96138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi16(__m128i __A,__m128i __B)96238fd1498Szrj _mm_packus_epi16 (__m128i __A, __m128i __B)
96338fd1498Szrj {
96438fd1498Szrj   return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
96538fd1498Szrj }
96638fd1498Szrj 
96738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi8(__m128i __A,__m128i __B)96838fd1498Szrj _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
96938fd1498Szrj {
97038fd1498Szrj   return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
97138fd1498Szrj }
97238fd1498Szrj 
97338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi16(__m128i __A,__m128i __B)97438fd1498Szrj _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
97538fd1498Szrj {
97638fd1498Szrj   return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
97738fd1498Szrj }
97838fd1498Szrj 
97938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi32(__m128i __A,__m128i __B)98038fd1498Szrj _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
98138fd1498Szrj {
98238fd1498Szrj   return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
98338fd1498Szrj }
98438fd1498Szrj 
98538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_epi64(__m128i __A,__m128i __B)98638fd1498Szrj _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
98738fd1498Szrj {
98838fd1498Szrj   return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
98938fd1498Szrj }
99038fd1498Szrj 
99138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi8(__m128i __A,__m128i __B)99238fd1498Szrj _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
99338fd1498Szrj {
99438fd1498Szrj   return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
99538fd1498Szrj }
99638fd1498Szrj 
99738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi16(__m128i __A,__m128i __B)99838fd1498Szrj _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
99938fd1498Szrj {
100038fd1498Szrj   return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
100138fd1498Szrj }
100238fd1498Szrj 
100338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi32(__m128i __A,__m128i __B)100438fd1498Szrj _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
100538fd1498Szrj {
100638fd1498Szrj   return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
100738fd1498Szrj }
100838fd1498Szrj 
100938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_epi64(__m128i __A,__m128i __B)101038fd1498Szrj _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
101138fd1498Szrj {
101238fd1498Szrj   return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
101338fd1498Szrj }
101438fd1498Szrj 
101538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi8(__m128i __A,__m128i __B)101638fd1498Szrj _mm_add_epi8 (__m128i __A, __m128i __B)
101738fd1498Szrj {
101838fd1498Szrj   return (__m128i) ((__v16qu)__A + (__v16qu)__B);
101938fd1498Szrj }
102038fd1498Szrj 
102138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi16(__m128i __A,__m128i __B)102238fd1498Szrj _mm_add_epi16 (__m128i __A, __m128i __B)
102338fd1498Szrj {
102438fd1498Szrj   return (__m128i) ((__v8hu)__A + (__v8hu)__B);
102538fd1498Szrj }
102638fd1498Szrj 
102738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi32(__m128i __A,__m128i __B)102838fd1498Szrj _mm_add_epi32 (__m128i __A, __m128i __B)
102938fd1498Szrj {
103038fd1498Szrj   return (__m128i) ((__v4su)__A + (__v4su)__B);
103138fd1498Szrj }
103238fd1498Szrj 
103338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_epi64(__m128i __A,__m128i __B)103438fd1498Szrj _mm_add_epi64 (__m128i __A, __m128i __B)
103538fd1498Szrj {
103638fd1498Szrj   return (__m128i) ((__v2du)__A + (__v2du)__B);
103738fd1498Szrj }
103838fd1498Szrj 
103938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi8(__m128i __A,__m128i __B)104038fd1498Szrj _mm_adds_epi8 (__m128i __A, __m128i __B)
104138fd1498Szrj {
104238fd1498Szrj   return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
104338fd1498Szrj }
104438fd1498Szrj 
104538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epi16(__m128i __A,__m128i __B)104638fd1498Szrj _mm_adds_epi16 (__m128i __A, __m128i __B)
104738fd1498Szrj {
104838fd1498Szrj   return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
104938fd1498Szrj }
105038fd1498Szrj 
105138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu8(__m128i __A,__m128i __B)105238fd1498Szrj _mm_adds_epu8 (__m128i __A, __m128i __B)
105338fd1498Szrj {
105438fd1498Szrj   return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
105538fd1498Szrj }
105638fd1498Szrj 
105738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_epu16(__m128i __A,__m128i __B)105838fd1498Szrj _mm_adds_epu16 (__m128i __A, __m128i __B)
105938fd1498Szrj {
106038fd1498Szrj   return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
106138fd1498Szrj }
106238fd1498Szrj 
106338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi8(__m128i __A,__m128i __B)106438fd1498Szrj _mm_sub_epi8 (__m128i __A, __m128i __B)
106538fd1498Szrj {
106638fd1498Szrj   return (__m128i) ((__v16qu)__A - (__v16qu)__B);
106738fd1498Szrj }
106838fd1498Szrj 
106938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi16(__m128i __A,__m128i __B)107038fd1498Szrj _mm_sub_epi16 (__m128i __A, __m128i __B)
107138fd1498Szrj {
107238fd1498Szrj   return (__m128i) ((__v8hu)__A - (__v8hu)__B);
107338fd1498Szrj }
107438fd1498Szrj 
107538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi32(__m128i __A,__m128i __B)107638fd1498Szrj _mm_sub_epi32 (__m128i __A, __m128i __B)
107738fd1498Szrj {
107838fd1498Szrj   return (__m128i) ((__v4su)__A - (__v4su)__B);
107938fd1498Szrj }
108038fd1498Szrj 
108138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_epi64(__m128i __A,__m128i __B)108238fd1498Szrj _mm_sub_epi64 (__m128i __A, __m128i __B)
108338fd1498Szrj {
108438fd1498Szrj   return (__m128i) ((__v2du)__A - (__v2du)__B);
108538fd1498Szrj }
108638fd1498Szrj 
108738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi8(__m128i __A,__m128i __B)108838fd1498Szrj _mm_subs_epi8 (__m128i __A, __m128i __B)
108938fd1498Szrj {
109038fd1498Szrj   return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
109138fd1498Szrj }
109238fd1498Szrj 
109338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epi16(__m128i __A,__m128i __B)109438fd1498Szrj _mm_subs_epi16 (__m128i __A, __m128i __B)
109538fd1498Szrj {
109638fd1498Szrj   return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
109738fd1498Szrj }
109838fd1498Szrj 
109938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu8(__m128i __A,__m128i __B)110038fd1498Szrj _mm_subs_epu8 (__m128i __A, __m128i __B)
110138fd1498Szrj {
110238fd1498Szrj   return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
110338fd1498Szrj }
110438fd1498Szrj 
110538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_epu16(__m128i __A,__m128i __B)110638fd1498Szrj _mm_subs_epu16 (__m128i __A, __m128i __B)
110738fd1498Szrj {
110838fd1498Szrj   return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
110938fd1498Szrj }
111038fd1498Szrj 
111138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_epi16(__m128i __A,__m128i __B)111238fd1498Szrj _mm_madd_epi16 (__m128i __A, __m128i __B)
111338fd1498Szrj {
111438fd1498Szrj   return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
111538fd1498Szrj }
111638fd1498Szrj 
111738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epi16(__m128i __A,__m128i __B)111838fd1498Szrj _mm_mulhi_epi16 (__m128i __A, __m128i __B)
111938fd1498Szrj {
112038fd1498Szrj   return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
112138fd1498Szrj }
112238fd1498Szrj 
112338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi16(__m128i __A,__m128i __B)112438fd1498Szrj _mm_mullo_epi16 (__m128i __A, __m128i __B)
112538fd1498Szrj {
112638fd1498Szrj   return (__m128i) ((__v8hu)__A * (__v8hu)__B);
112738fd1498Szrj }
112838fd1498Szrj 
112938fd1498Szrj extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_su32(__m64 __A,__m64 __B)113038fd1498Szrj _mm_mul_su32 (__m64 __A, __m64 __B)
113138fd1498Szrj {
113238fd1498Szrj   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
113338fd1498Szrj }
113438fd1498Szrj 
113538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epu32(__m128i __A,__m128i __B)113638fd1498Szrj _mm_mul_epu32 (__m128i __A, __m128i __B)
113738fd1498Szrj {
113838fd1498Szrj   return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
113938fd1498Szrj }
114038fd1498Szrj 
114138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi16(__m128i __A,int __B)114238fd1498Szrj _mm_slli_epi16 (__m128i __A, int __B)
114338fd1498Szrj {
114438fd1498Szrj   return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
114538fd1498Szrj }
114638fd1498Szrj 
114738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi32(__m128i __A,int __B)114838fd1498Szrj _mm_slli_epi32 (__m128i __A, int __B)
114938fd1498Szrj {
115038fd1498Szrj   return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
115138fd1498Szrj }
115238fd1498Szrj 
115338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_epi64(__m128i __A,int __B)115438fd1498Szrj _mm_slli_epi64 (__m128i __A, int __B)
115538fd1498Szrj {
115638fd1498Szrj   return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
115738fd1498Szrj }
115838fd1498Szrj 
115938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi16(__m128i __A,int __B)116038fd1498Szrj _mm_srai_epi16 (__m128i __A, int __B)
116138fd1498Szrj {
116238fd1498Szrj   return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
116338fd1498Szrj }
116438fd1498Szrj 
116538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_epi32(__m128i __A,int __B)116638fd1498Szrj _mm_srai_epi32 (__m128i __A, int __B)
116738fd1498Szrj {
116838fd1498Szrj   return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
116938fd1498Szrj }
117038fd1498Szrj 
117138fd1498Szrj #ifdef __OPTIMIZE__
117238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_bsrli_si128(__m128i __A,const int __N)117338fd1498Szrj _mm_bsrli_si128 (__m128i __A, const int __N)
117438fd1498Szrj {
117538fd1498Szrj   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
117638fd1498Szrj }
117738fd1498Szrj 
117838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_bslli_si128(__m128i __A,const int __N)117938fd1498Szrj _mm_bslli_si128 (__m128i __A, const int __N)
118038fd1498Szrj {
118138fd1498Szrj   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
118238fd1498Szrj }
118338fd1498Szrj 
118438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si128(__m128i __A,const int __N)118538fd1498Szrj _mm_srli_si128 (__m128i __A, const int __N)
118638fd1498Szrj {
118738fd1498Szrj   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
118838fd1498Szrj }
118938fd1498Szrj 
119038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si128(__m128i __A,const int __N)119138fd1498Szrj _mm_slli_si128 (__m128i __A, const int __N)
119238fd1498Szrj {
119338fd1498Szrj   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
119438fd1498Szrj }
119538fd1498Szrj #else
119638fd1498Szrj #define _mm_bsrli_si128(A, N) \
119738fd1498Szrj   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
119838fd1498Szrj #define _mm_bslli_si128(A, N) \
119938fd1498Szrj   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
120038fd1498Szrj #define _mm_srli_si128(A, N) \
120138fd1498Szrj   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
120238fd1498Szrj #define _mm_slli_si128(A, N) \
120338fd1498Szrj   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
120438fd1498Szrj #endif
120538fd1498Szrj 
120638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi16(__m128i __A,int __B)120738fd1498Szrj _mm_srli_epi16 (__m128i __A, int __B)
120838fd1498Szrj {
120938fd1498Szrj   return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
121038fd1498Szrj }
121138fd1498Szrj 
121238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi32(__m128i __A,int __B)121338fd1498Szrj _mm_srli_epi32 (__m128i __A, int __B)
121438fd1498Szrj {
121538fd1498Szrj   return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
121638fd1498Szrj }
121738fd1498Szrj 
121838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_epi64(__m128i __A,int __B)121938fd1498Szrj _mm_srli_epi64 (__m128i __A, int __B)
122038fd1498Szrj {
122138fd1498Szrj   return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
122238fd1498Szrj }
122338fd1498Szrj 
122438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi16(__m128i __A,__m128i __B)122538fd1498Szrj _mm_sll_epi16 (__m128i __A, __m128i __B)
122638fd1498Szrj {
122738fd1498Szrj   return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
122838fd1498Szrj }
122938fd1498Szrj 
123038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi32(__m128i __A,__m128i __B)123138fd1498Szrj _mm_sll_epi32 (__m128i __A, __m128i __B)
123238fd1498Szrj {
123338fd1498Szrj   return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
123438fd1498Szrj }
123538fd1498Szrj 
123638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_epi64(__m128i __A,__m128i __B)123738fd1498Szrj _mm_sll_epi64 (__m128i __A, __m128i __B)
123838fd1498Szrj {
123938fd1498Szrj   return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
124038fd1498Szrj }
124138fd1498Szrj 
124238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi16(__m128i __A,__m128i __B)124338fd1498Szrj _mm_sra_epi16 (__m128i __A, __m128i __B)
124438fd1498Szrj {
124538fd1498Szrj   return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
124638fd1498Szrj }
124738fd1498Szrj 
124838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_epi32(__m128i __A,__m128i __B)124938fd1498Szrj _mm_sra_epi32 (__m128i __A, __m128i __B)
125038fd1498Szrj {
125138fd1498Szrj   return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
125238fd1498Szrj }
125338fd1498Szrj 
125438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi16(__m128i __A,__m128i __B)125538fd1498Szrj _mm_srl_epi16 (__m128i __A, __m128i __B)
125638fd1498Szrj {
125738fd1498Szrj   return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
125838fd1498Szrj }
125938fd1498Szrj 
126038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi32(__m128i __A,__m128i __B)126138fd1498Szrj _mm_srl_epi32 (__m128i __A, __m128i __B)
126238fd1498Szrj {
126338fd1498Szrj   return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
126438fd1498Szrj }
126538fd1498Szrj 
126638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_epi64(__m128i __A,__m128i __B)126738fd1498Szrj _mm_srl_epi64 (__m128i __A, __m128i __B)
126838fd1498Szrj {
126938fd1498Szrj   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
127038fd1498Szrj }
127138fd1498Szrj 
127238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si128(__m128i __A,__m128i __B)127338fd1498Szrj _mm_and_si128 (__m128i __A, __m128i __B)
127438fd1498Szrj {
127538fd1498Szrj   return (__m128i) ((__v2du)__A & (__v2du)__B);
127638fd1498Szrj }
127738fd1498Szrj 
127838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si128(__m128i __A,__m128i __B)127938fd1498Szrj _mm_andnot_si128 (__m128i __A, __m128i __B)
128038fd1498Szrj {
128138fd1498Szrj   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
128238fd1498Szrj }
128338fd1498Szrj 
128438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si128(__m128i __A,__m128i __B)128538fd1498Szrj _mm_or_si128 (__m128i __A, __m128i __B)
128638fd1498Szrj {
128738fd1498Szrj   return (__m128i) ((__v2du)__A | (__v2du)__B);
128838fd1498Szrj }
128938fd1498Szrj 
129038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si128(__m128i __A,__m128i __B)129138fd1498Szrj _mm_xor_si128 (__m128i __A, __m128i __B)
129238fd1498Szrj {
129338fd1498Szrj   return (__m128i) ((__v2du)__A ^ (__v2du)__B);
129438fd1498Szrj }
129538fd1498Szrj 
129638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi8(__m128i __A,__m128i __B)129738fd1498Szrj _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
129838fd1498Szrj {
1299*58e805e6Szrj   return (__m128i) ((__v16qs)__A == (__v16qs)__B);
130038fd1498Szrj }
130138fd1498Szrj 
130238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi16(__m128i __A,__m128i __B)130338fd1498Szrj _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
130438fd1498Szrj {
130538fd1498Szrj   return (__m128i) ((__v8hi)__A == (__v8hi)__B);
130638fd1498Szrj }
130738fd1498Szrj 
130838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi32(__m128i __A,__m128i __B)130938fd1498Szrj _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
131038fd1498Szrj {
131138fd1498Szrj   return (__m128i) ((__v4si)__A == (__v4si)__B);
131238fd1498Szrj }
131338fd1498Szrj 
131438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi8(__m128i __A,__m128i __B)131538fd1498Szrj _mm_cmplt_epi8 (__m128i __A, __m128i __B)
131638fd1498Szrj {
1317*58e805e6Szrj   return (__m128i) ((__v16qs)__A < (__v16qs)__B);
131838fd1498Szrj }
131938fd1498Szrj 
132038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi16(__m128i __A,__m128i __B)132138fd1498Szrj _mm_cmplt_epi16 (__m128i __A, __m128i __B)
132238fd1498Szrj {
132338fd1498Szrj   return (__m128i) ((__v8hi)__A < (__v8hi)__B);
132438fd1498Szrj }
132538fd1498Szrj 
132638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmplt_epi32(__m128i __A,__m128i __B)132738fd1498Szrj _mm_cmplt_epi32 (__m128i __A, __m128i __B)
132838fd1498Szrj {
132938fd1498Szrj   return (__m128i) ((__v4si)__A < (__v4si)__B);
133038fd1498Szrj }
133138fd1498Szrj 
133238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi8(__m128i __A,__m128i __B)133338fd1498Szrj _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
133438fd1498Szrj {
1335*58e805e6Szrj   return (__m128i) ((__v16qs)__A > (__v16qs)__B);
133638fd1498Szrj }
133738fd1498Szrj 
133838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi16(__m128i __A,__m128i __B)133938fd1498Szrj _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
134038fd1498Szrj {
134138fd1498Szrj   return (__m128i) ((__v8hi)__A > (__v8hi)__B);
134238fd1498Szrj }
134338fd1498Szrj 
134438fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi32(__m128i __A,__m128i __B)134538fd1498Szrj _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
134638fd1498Szrj {
134738fd1498Szrj   return (__m128i) ((__v4si)__A > (__v4si)__B);
134838fd1498Szrj }
134938fd1498Szrj 
135038fd1498Szrj #ifdef __OPTIMIZE__
135138fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16(__m128i const __A,int const __N)135238fd1498Szrj _mm_extract_epi16 (__m128i const __A, int const __N)
135338fd1498Szrj {
135438fd1498Szrj   return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
135538fd1498Szrj }
135638fd1498Szrj 
135738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi16(__m128i const __A,int const __D,int const __N)135838fd1498Szrj _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
135938fd1498Szrj {
136038fd1498Szrj   return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
136138fd1498Szrj }
136238fd1498Szrj #else
136338fd1498Szrj #define _mm_extract_epi16(A, N) \
136438fd1498Szrj   ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
136538fd1498Szrj #define _mm_insert_epi16(A, D, N)				\
136638fd1498Szrj   ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
136738fd1498Szrj 					  (int)(D), (int)(N)))
136838fd1498Szrj #endif
136938fd1498Szrj 
137038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi16(__m128i __A,__m128i __B)137138fd1498Szrj _mm_max_epi16 (__m128i __A, __m128i __B)
137238fd1498Szrj {
137338fd1498Szrj   return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
137438fd1498Szrj }
137538fd1498Szrj 
137638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu8(__m128i __A,__m128i __B)137738fd1498Szrj _mm_max_epu8 (__m128i __A, __m128i __B)
137838fd1498Szrj {
137938fd1498Szrj   return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
138038fd1498Szrj }
138138fd1498Szrj 
138238fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi16(__m128i __A,__m128i __B)138338fd1498Szrj _mm_min_epi16 (__m128i __A, __m128i __B)
138438fd1498Szrj {
138538fd1498Szrj   return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
138638fd1498Szrj }
138738fd1498Szrj 
138838fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu8(__m128i __A,__m128i __B)138938fd1498Szrj _mm_min_epu8 (__m128i __A, __m128i __B)
139038fd1498Szrj {
139138fd1498Szrj   return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
139238fd1498Szrj }
139338fd1498Szrj 
139438fd1498Szrj extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_epi8(__m128i __A)139538fd1498Szrj _mm_movemask_epi8 (__m128i __A)
139638fd1498Szrj {
139738fd1498Szrj   return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
139838fd1498Szrj }
139938fd1498Szrj 
140038fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_epu16(__m128i __A,__m128i __B)140138fd1498Szrj _mm_mulhi_epu16 (__m128i __A, __m128i __B)
140238fd1498Szrj {
140338fd1498Szrj   return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
140438fd1498Szrj }
140538fd1498Szrj 
140638fd1498Szrj #ifdef __OPTIMIZE__
140738fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflehi_epi16(__m128i __A,const int __mask)140838fd1498Szrj _mm_shufflehi_epi16 (__m128i __A, const int __mask)
140938fd1498Szrj {
141038fd1498Szrj   return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
141138fd1498Szrj }
141238fd1498Szrj 
141338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shufflelo_epi16(__m128i __A,const int __mask)141438fd1498Szrj _mm_shufflelo_epi16 (__m128i __A, const int __mask)
141538fd1498Szrj {
141638fd1498Szrj   return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
141738fd1498Szrj }
141838fd1498Szrj 
141938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi32(__m128i __A,const int __mask)142038fd1498Szrj _mm_shuffle_epi32 (__m128i __A, const int __mask)
142138fd1498Szrj {
142238fd1498Szrj   return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
142338fd1498Szrj }
142438fd1498Szrj #else
142538fd1498Szrj #define _mm_shufflehi_epi16(A, N) \
142638fd1498Szrj   ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
142738fd1498Szrj #define _mm_shufflelo_epi16(A, N) \
142838fd1498Szrj   ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
142938fd1498Szrj #define _mm_shuffle_epi32(A, N) \
143038fd1498Szrj   ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
143138fd1498Szrj #endif
143238fd1498Szrj 
143338fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskmoveu_si128(__m128i __A,__m128i __B,char * __C)143438fd1498Szrj _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
143538fd1498Szrj {
143638fd1498Szrj   __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
143738fd1498Szrj }
143838fd1498Szrj 
143938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu8(__m128i __A,__m128i __B)144038fd1498Szrj _mm_avg_epu8 (__m128i __A, __m128i __B)
144138fd1498Szrj {
144238fd1498Szrj   return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
144338fd1498Szrj }
144438fd1498Szrj 
144538fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_avg_epu16(__m128i __A,__m128i __B)144638fd1498Szrj _mm_avg_epu16 (__m128i __A, __m128i __B)
144738fd1498Szrj {
144838fd1498Szrj   return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
144938fd1498Szrj }
145038fd1498Szrj 
145138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sad_epu8(__m128i __A,__m128i __B)145238fd1498Szrj _mm_sad_epu8 (__m128i __A, __m128i __B)
145338fd1498Szrj {
145438fd1498Szrj   return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
145538fd1498Szrj }
145638fd1498Szrj 
145738fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si32(int * __A,int __B)145838fd1498Szrj _mm_stream_si32 (int *__A, int __B)
145938fd1498Szrj {
146038fd1498Szrj   __builtin_ia32_movnti (__A, __B);
146138fd1498Szrj }
146238fd1498Szrj 
146338fd1498Szrj #ifdef __x86_64__
146438fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si64(long long int * __A,long long int __B)146538fd1498Szrj _mm_stream_si64 (long long int *__A, long long int __B)
146638fd1498Szrj {
146738fd1498Szrj   __builtin_ia32_movnti64 (__A, __B);
146838fd1498Szrj }
146938fd1498Szrj #endif
147038fd1498Szrj 
147138fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_si128(__m128i * __A,__m128i __B)147238fd1498Szrj _mm_stream_si128 (__m128i *__A, __m128i __B)
147338fd1498Szrj {
147438fd1498Szrj   __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
147538fd1498Szrj }
147638fd1498Szrj 
147738fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pd(double * __A,__m128d __B)147838fd1498Szrj _mm_stream_pd (double *__A, __m128d __B)
147938fd1498Szrj {
148038fd1498Szrj   __builtin_ia32_movntpd (__A, (__v2df)__B);
148138fd1498Szrj }
148238fd1498Szrj 
148338fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflush(void const * __A)148438fd1498Szrj _mm_clflush (void const *__A)
148538fd1498Szrj {
148638fd1498Szrj   __builtin_ia32_clflush (__A);
148738fd1498Szrj }
148838fd1498Szrj 
148938fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lfence(void)149038fd1498Szrj _mm_lfence (void)
149138fd1498Szrj {
149238fd1498Szrj   __builtin_ia32_lfence ();
149338fd1498Szrj }
149438fd1498Szrj 
149538fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mfence(void)149638fd1498Szrj _mm_mfence (void)
149738fd1498Szrj {
149838fd1498Szrj   __builtin_ia32_mfence ();
149938fd1498Szrj }
150038fd1498Szrj 
150138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si128(int __A)150238fd1498Szrj _mm_cvtsi32_si128 (int __A)
150338fd1498Szrj {
150438fd1498Szrj   return _mm_set_epi32 (0, 0, 0, __A);
150538fd1498Szrj }
150638fd1498Szrj 
150738fd1498Szrj #ifdef __x86_64__
150838fd1498Szrj /* Intel intrinsic.  */
150938fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si128(long long __A)151038fd1498Szrj _mm_cvtsi64_si128 (long long __A)
151138fd1498Szrj {
151238fd1498Szrj   return _mm_set_epi64x (0, __A);
151338fd1498Szrj }
151438fd1498Szrj 
151538fd1498Szrj /* Microsoft intrinsic.  */
151638fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si128(long long __A)151738fd1498Szrj _mm_cvtsi64x_si128 (long long __A)
151838fd1498Szrj {
151938fd1498Szrj   return _mm_set_epi64x (0, __A);
152038fd1498Szrj }
152138fd1498Szrj #endif
152238fd1498Szrj 
152338fd1498Szrj /* Casts between various SP, DP, INT vector types.  Note that these do no
152438fd1498Szrj    conversion of values, they just change the type.  */
152538fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_ps(__m128d __A)152638fd1498Szrj _mm_castpd_ps(__m128d __A)
152738fd1498Szrj {
152838fd1498Szrj   return (__m128) __A;
152938fd1498Szrj }
153038fd1498Szrj 
153138fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castpd_si128(__m128d __A)153238fd1498Szrj _mm_castpd_si128(__m128d __A)
153338fd1498Szrj {
153438fd1498Szrj   return (__m128i) __A;
153538fd1498Szrj }
153638fd1498Szrj 
153738fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_pd(__m128 __A)153838fd1498Szrj _mm_castps_pd(__m128 __A)
153938fd1498Szrj {
154038fd1498Szrj   return (__m128d) __A;
154138fd1498Szrj }
154238fd1498Szrj 
154338fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castps_si128(__m128 __A)154438fd1498Szrj _mm_castps_si128(__m128 __A)
154538fd1498Szrj {
154638fd1498Szrj   return (__m128i) __A;
154738fd1498Szrj }
154838fd1498Szrj 
154938fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_ps(__m128i __A)155038fd1498Szrj _mm_castsi128_ps(__m128i __A)
155138fd1498Szrj {
155238fd1498Szrj   return (__m128) __A;
155338fd1498Szrj }
155438fd1498Szrj 
155538fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_castsi128_pd(__m128i __A)155638fd1498Szrj _mm_castsi128_pd(__m128i __A)
155738fd1498Szrj {
155838fd1498Szrj   return (__m128d) __A;
155938fd1498Szrj }
156038fd1498Szrj 
156138fd1498Szrj #ifdef __DISABLE_SSE2__
156238fd1498Szrj #undef __DISABLE_SSE2__
156338fd1498Szrj #pragma GCC pop_options
156438fd1498Szrj #endif /* __DISABLE_SSE2__ */
156538fd1498Szrj 
156638fd1498Szrj #endif /* _EMMINTRIN_H_INCLUDED */
1567