xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/pmmintrin.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2003-2018 Free Software Foundation, Inc.
2*38fd1498Szrj 
3*38fd1498Szrj    This file is part of GCC.
4*38fd1498Szrj 
5*38fd1498Szrj    GCC is free software; you can redistribute it and/or modify
6*38fd1498Szrj    it under the terms of the GNU General Public License as published by
7*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
8*38fd1498Szrj    any later version.
9*38fd1498Szrj 
10*38fd1498Szrj    GCC is distributed in the hope that it will be useful,
11*38fd1498Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
12*38fd1498Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13*38fd1498Szrj    GNU General Public License for more details.
14*38fd1498Szrj 
15*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
16*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
17*38fd1498Szrj    3.1, as published by the Free Software Foundation.
18*38fd1498Szrj 
19*38fd1498Szrj    You should have received a copy of the GNU General Public License and
20*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
21*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
23*38fd1498Szrj 
24*38fd1498Szrj /* Implemented from the specification included in the Intel C++ Compiler
25*38fd1498Szrj    User Guide and Reference, version 9.0.  */
26*38fd1498Szrj 
27*38fd1498Szrj #ifndef _PMMINTRIN_H_INCLUDED
28*38fd1498Szrj #define _PMMINTRIN_H_INCLUDED
29*38fd1498Szrj 
30*38fd1498Szrj /* We need definitions from the SSE2 and SSE header files*/
31*38fd1498Szrj #include <emmintrin.h>
32*38fd1498Szrj 
33*38fd1498Szrj #ifndef __SSE3__
34*38fd1498Szrj #pragma GCC push_options
35*38fd1498Szrj #pragma GCC target("sse3")
36*38fd1498Szrj #define __DISABLE_SSE3__
37*38fd1498Szrj #endif /* __SSE3__ */
38*38fd1498Szrj 
39*38fd1498Szrj /* Additional bits in the MXCSR.  */
40*38fd1498Szrj #define _MM_DENORMALS_ZERO_MASK		0x0040
41*38fd1498Szrj #define _MM_DENORMALS_ZERO_ON		0x0040
42*38fd1498Szrj #define _MM_DENORMALS_ZERO_OFF		0x0000
43*38fd1498Szrj 
44*38fd1498Szrj #define _MM_SET_DENORMALS_ZERO_MODE(mode) \
45*38fd1498Szrj   _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
46*38fd1498Szrj #define _MM_GET_DENORMALS_ZERO_MODE() \
47*38fd1498Szrj   (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
48*38fd1498Szrj 
49*38fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_ps(__m128 __X,__m128 __Y)50*38fd1498Szrj _mm_addsub_ps (__m128 __X, __m128 __Y)
51*38fd1498Szrj {
52*38fd1498Szrj   return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
53*38fd1498Szrj }
54*38fd1498Szrj 
55*38fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_ps(__m128 __X,__m128 __Y)56*38fd1498Szrj _mm_hadd_ps (__m128 __X, __m128 __Y)
57*38fd1498Szrj {
58*38fd1498Szrj   return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
59*38fd1498Szrj }
60*38fd1498Szrj 
61*38fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_ps(__m128 __X,__m128 __Y)62*38fd1498Szrj _mm_hsub_ps (__m128 __X, __m128 __Y)
63*38fd1498Szrj {
64*38fd1498Szrj   return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
65*38fd1498Szrj }
66*38fd1498Szrj 
67*38fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movehdup_ps(__m128 __X)68*38fd1498Szrj _mm_movehdup_ps (__m128 __X)
69*38fd1498Szrj {
70*38fd1498Szrj   return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
71*38fd1498Szrj }
72*38fd1498Szrj 
73*38fd1498Szrj extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_moveldup_ps(__m128 __X)74*38fd1498Szrj _mm_moveldup_ps (__m128 __X)
75*38fd1498Szrj {
76*38fd1498Szrj   return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
77*38fd1498Szrj }
78*38fd1498Szrj 
79*38fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_addsub_pd(__m128d __X,__m128d __Y)80*38fd1498Szrj _mm_addsub_pd (__m128d __X, __m128d __Y)
81*38fd1498Szrj {
82*38fd1498Szrj   return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pd(__m128d __X,__m128d __Y)86*38fd1498Szrj _mm_hadd_pd (__m128d __X, __m128d __Y)
87*38fd1498Szrj {
88*38fd1498Szrj   return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
89*38fd1498Szrj }
90*38fd1498Szrj 
91*38fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pd(__m128d __X,__m128d __Y)92*38fd1498Szrj _mm_hsub_pd (__m128d __X, __m128d __Y)
93*38fd1498Szrj {
94*38fd1498Szrj   return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
95*38fd1498Szrj }
96*38fd1498Szrj 
97*38fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loaddup_pd(double const * __P)98*38fd1498Szrj _mm_loaddup_pd (double const *__P)
99*38fd1498Szrj {
100*38fd1498Szrj   return _mm_load1_pd (__P);
101*38fd1498Szrj }
102*38fd1498Szrj 
103*38fd1498Szrj extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movedup_pd(__m128d __X)104*38fd1498Szrj _mm_movedup_pd (__m128d __X)
105*38fd1498Szrj {
106*38fd1498Szrj   return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
107*38fd1498Szrj }
108*38fd1498Szrj 
109*38fd1498Szrj extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_lddqu_si128(__m128i const * __P)110*38fd1498Szrj _mm_lddqu_si128 (__m128i const *__P)
111*38fd1498Szrj {
112*38fd1498Szrj   return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
113*38fd1498Szrj }
114*38fd1498Szrj 
115*38fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_monitor(void const * __P,unsigned int __E,unsigned int __H)116*38fd1498Szrj _mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
117*38fd1498Szrj {
118*38fd1498Szrj   __builtin_ia32_monitor (__P, __E, __H);
119*38fd1498Szrj }
120*38fd1498Szrj 
121*38fd1498Szrj extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mwait(unsigned int __E,unsigned int __H)122*38fd1498Szrj _mm_mwait (unsigned int __E, unsigned int __H)
123*38fd1498Szrj {
124*38fd1498Szrj   __builtin_ia32_mwait (__E, __H);
125*38fd1498Szrj }
126*38fd1498Szrj 
127*38fd1498Szrj #ifdef __DISABLE_SSE3__
128*38fd1498Szrj #undef __DISABLE_SSE3__
129*38fd1498Szrj #pragma GCC pop_options
130*38fd1498Szrj #endif /* __DISABLE_SSE3__ */
131*38fd1498Szrj 
132*38fd1498Szrj #endif /* _PMMINTRIN_H_INCLUDED */
133