1*e4b17023SJohn Marino /* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
2*e4b17023SJohn Marino Free Software Foundation, Inc.
3*e4b17023SJohn Marino
4*e4b17023SJohn Marino This file is part of GCC.
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify
7*e4b17023SJohn Marino it under the terms of the GNU General Public License as published by
8*e4b17023SJohn Marino the Free Software Foundation; either version 3, or (at your option)
9*e4b17023SJohn Marino any later version.
10*e4b17023SJohn Marino
11*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful,
12*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*e4b17023SJohn Marino GNU General Public License for more details.
15*e4b17023SJohn Marino
16*e4b17023SJohn Marino Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino 3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino
20*e4b17023SJohn Marino You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
24*e4b17023SJohn Marino
25*e4b17023SJohn Marino /* Implemented from the specification included in the Intel C++ Compiler
26*e4b17023SJohn Marino User Guide and Reference, version 9.0. */
27*e4b17023SJohn Marino
28*e4b17023SJohn Marino #ifndef _PMMINTRIN_H_INCLUDED
29*e4b17023SJohn Marino #define _PMMINTRIN_H_INCLUDED
30*e4b17023SJohn Marino
31*e4b17023SJohn Marino #ifndef __SSE3__
32*e4b17023SJohn Marino # error "SSE3 instruction set not enabled"
33*e4b17023SJohn Marino #else
34*e4b17023SJohn Marino
35*e4b17023SJohn Marino /* We need definitions from the SSE2 and SSE header files*/
36*e4b17023SJohn Marino #include <emmintrin.h>
37*e4b17023SJohn Marino
38*e4b17023SJohn Marino /* Additional bits in the MXCSR. */
39*e4b17023SJohn Marino #define _MM_DENORMALS_ZERO_MASK 0x0040
40*e4b17023SJohn Marino #define _MM_DENORMALS_ZERO_ON 0x0040
41*e4b17023SJohn Marino #define _MM_DENORMALS_ZERO_OFF 0x0000
42*e4b17023SJohn Marino
43*e4b17023SJohn Marino #define _MM_SET_DENORMALS_ZERO_MODE(mode) \
44*e4b17023SJohn Marino _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
45*e4b17023SJohn Marino #define _MM_GET_DENORMALS_ZERO_MODE() \
46*e4b17023SJohn Marino (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps(__m128 __X,__m128 __Y)49*e4b17023SJohn Marino _mm_addsub_ps (__m128 __X, __m128 __Y)
50*e4b17023SJohn Marino {
51*e4b17023SJohn Marino return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
52*e4b17023SJohn Marino }
53*e4b17023SJohn Marino
54*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps(__m128 __X,__m128 __Y)55*e4b17023SJohn Marino _mm_hadd_ps (__m128 __X, __m128 __Y)
56*e4b17023SJohn Marino {
57*e4b17023SJohn Marino return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
58*e4b17023SJohn Marino }
59*e4b17023SJohn Marino
60*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps(__m128 __X,__m128 __Y)61*e4b17023SJohn Marino _mm_hsub_ps (__m128 __X, __m128 __Y)
62*e4b17023SJohn Marino {
63*e4b17023SJohn Marino return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
64*e4b17023SJohn Marino }
65*e4b17023SJohn Marino
66*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps(__m128 __X)67*e4b17023SJohn Marino _mm_movehdup_ps (__m128 __X)
68*e4b17023SJohn Marino {
69*e4b17023SJohn Marino return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
70*e4b17023SJohn Marino }
71*e4b17023SJohn Marino
72*e4b17023SJohn Marino extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps(__m128 __X)73*e4b17023SJohn Marino _mm_moveldup_ps (__m128 __X)
74*e4b17023SJohn Marino {
75*e4b17023SJohn Marino return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
76*e4b17023SJohn Marino }
77*e4b17023SJohn Marino
78*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd(__m128d __X,__m128d __Y)79*e4b17023SJohn Marino _mm_addsub_pd (__m128d __X, __m128d __Y)
80*e4b17023SJohn Marino {
81*e4b17023SJohn Marino return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
82*e4b17023SJohn Marino }
83*e4b17023SJohn Marino
84*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd(__m128d __X,__m128d __Y)85*e4b17023SJohn Marino _mm_hadd_pd (__m128d __X, __m128d __Y)
86*e4b17023SJohn Marino {
87*e4b17023SJohn Marino return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
88*e4b17023SJohn Marino }
89*e4b17023SJohn Marino
90*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd(__m128d __X,__m128d __Y)91*e4b17023SJohn Marino _mm_hsub_pd (__m128d __X, __m128d __Y)
92*e4b17023SJohn Marino {
93*e4b17023SJohn Marino return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
94*e4b17023SJohn Marino }
95*e4b17023SJohn Marino
96*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd(double const * __P)97*e4b17023SJohn Marino _mm_loaddup_pd (double const *__P)
98*e4b17023SJohn Marino {
99*e4b17023SJohn Marino return _mm_load1_pd (__P);
100*e4b17023SJohn Marino }
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd(__m128d __X)103*e4b17023SJohn Marino _mm_movedup_pd (__m128d __X)
104*e4b17023SJohn Marino {
105*e4b17023SJohn Marino return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
106*e4b17023SJohn Marino }
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128(__m128i const * __P)109*e4b17023SJohn Marino _mm_lddqu_si128 (__m128i const *__P)
110*e4b17023SJohn Marino {
111*e4b17023SJohn Marino return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
112*e4b17023SJohn Marino }
113*e4b17023SJohn Marino
114*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitor(void const * __P,unsigned int __E,unsigned int __H)115*e4b17023SJohn Marino _mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
116*e4b17023SJohn Marino {
117*e4b17023SJohn Marino __builtin_ia32_monitor (__P, __E, __H);
118*e4b17023SJohn Marino }
119*e4b17023SJohn Marino
120*e4b17023SJohn Marino extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwait(unsigned int __E,unsigned int __H)121*e4b17023SJohn Marino _mm_mwait (unsigned int __E, unsigned int __H)
122*e4b17023SJohn Marino {
123*e4b17023SJohn Marino __builtin_ia32_mwait (__E, __H);
124*e4b17023SJohn Marino }
125*e4b17023SJohn Marino
126*e4b17023SJohn Marino #endif /* __SSE3__ */
127*e4b17023SJohn Marino
128*e4b17023SJohn Marino #endif /* _PMMINTRIN_H_INCLUDED */
129