1e5dd7070Spatrick /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
2e5dd7070Spatrick *
3e5dd7070Spatrick * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick * See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick *
7e5dd7070Spatrick *===-----------------------------------------------------------------------===
8e5dd7070Spatrick */
9e5dd7070Spatrick
10e5dd7070Spatrick #ifndef __PMMINTRIN_H
11e5dd7070Spatrick #define __PMMINTRIN_H
12e5dd7070Spatrick
13*12c85518Srobert #if !defined(__i386__) && !defined(__x86_64__)
14*12c85518Srobert #error "This header is only meant to be used on x86 and x64 architecture"
15*12c85518Srobert #endif
16*12c85518Srobert
17e5dd7070Spatrick #include <emmintrin.h>
18e5dd7070Spatrick
19e5dd7070Spatrick /* Define the default attributes for the functions in this file. */
20e5dd7070Spatrick #define __DEFAULT_FN_ATTRS \
21e5dd7070Spatrick __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
22e5dd7070Spatrick
23e5dd7070Spatrick /// Loads data from an unaligned memory location to elements in a 128-bit
24e5dd7070Spatrick /// vector.
25e5dd7070Spatrick ///
26e5dd7070Spatrick /// If the address of the data is not 16-byte aligned, the instruction may
27e5dd7070Spatrick /// read two adjacent aligned blocks of memory to retrieve the requested
28e5dd7070Spatrick /// data.
29e5dd7070Spatrick ///
30e5dd7070Spatrick /// \headerfile <x86intrin.h>
31e5dd7070Spatrick ///
32e5dd7070Spatrick /// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
33e5dd7070Spatrick ///
34e5dd7070Spatrick /// \param __p
35e5dd7070Spatrick /// A pointer to a 128-bit integer vector containing integer values.
36e5dd7070Spatrick /// \returns A 128-bit vector containing the moved values.
37e5dd7070Spatrick static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_lddqu_si128(__m128i_u const * __p)38*12c85518Srobert _mm_lddqu_si128(__m128i_u const *__p)
39e5dd7070Spatrick {
40e5dd7070Spatrick return (__m128i)__builtin_ia32_lddqu((char const *)__p);
41e5dd7070Spatrick }
42e5dd7070Spatrick
43e5dd7070Spatrick /// Adds the even-indexed values and subtracts the odd-indexed values of
44e5dd7070Spatrick /// two 128-bit vectors of [4 x float].
45e5dd7070Spatrick ///
46e5dd7070Spatrick /// \headerfile <x86intrin.h>
47e5dd7070Spatrick ///
48e5dd7070Spatrick /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
49e5dd7070Spatrick ///
50e5dd7070Spatrick /// \param __a
51e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing the left source operand.
52e5dd7070Spatrick /// \param __b
53e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing the right source operand.
54e5dd7070Spatrick /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
55e5dd7070Spatrick /// differences of both operands.
56e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_addsub_ps(__m128 __a,__m128 __b)57e5dd7070Spatrick _mm_addsub_ps(__m128 __a, __m128 __b)
58e5dd7070Spatrick {
59e5dd7070Spatrick return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
60e5dd7070Spatrick }
61e5dd7070Spatrick
62e5dd7070Spatrick /// Horizontally adds the adjacent pairs of values contained in two
63e5dd7070Spatrick /// 128-bit vectors of [4 x float].
64e5dd7070Spatrick ///
65e5dd7070Spatrick /// \headerfile <x86intrin.h>
66e5dd7070Spatrick ///
67e5dd7070Spatrick /// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
68e5dd7070Spatrick ///
69e5dd7070Spatrick /// \param __a
70e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing one of the source operands.
71e5dd7070Spatrick /// The horizontal sums of the values are stored in the lower bits of the
72e5dd7070Spatrick /// destination.
73e5dd7070Spatrick /// \param __b
74e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing one of the source operands.
75e5dd7070Spatrick /// The horizontal sums of the values are stored in the upper bits of the
76e5dd7070Spatrick /// destination.
77e5dd7070Spatrick /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
78e5dd7070Spatrick /// both operands.
79e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_hadd_ps(__m128 __a,__m128 __b)80e5dd7070Spatrick _mm_hadd_ps(__m128 __a, __m128 __b)
81e5dd7070Spatrick {
82e5dd7070Spatrick return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
83e5dd7070Spatrick }
84e5dd7070Spatrick
85e5dd7070Spatrick /// Horizontally subtracts the adjacent pairs of values contained in two
86e5dd7070Spatrick /// 128-bit vectors of [4 x float].
87e5dd7070Spatrick ///
88e5dd7070Spatrick /// \headerfile <x86intrin.h>
89e5dd7070Spatrick ///
90e5dd7070Spatrick /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
91e5dd7070Spatrick ///
92e5dd7070Spatrick /// \param __a
93e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing one of the source operands.
94e5dd7070Spatrick /// The horizontal differences between the values are stored in the lower
95e5dd7070Spatrick /// bits of the destination.
96e5dd7070Spatrick /// \param __b
97e5dd7070Spatrick /// A 128-bit vector of [4 x float] containing one of the source operands.
98e5dd7070Spatrick /// The horizontal differences between the values are stored in the upper
99e5dd7070Spatrick /// bits of the destination.
100e5dd7070Spatrick /// \returns A 128-bit vector of [4 x float] containing the horizontal
101e5dd7070Spatrick /// differences of both operands.
102e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_hsub_ps(__m128 __a,__m128 __b)103e5dd7070Spatrick _mm_hsub_ps(__m128 __a, __m128 __b)
104e5dd7070Spatrick {
105e5dd7070Spatrick return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
106e5dd7070Spatrick }
107e5dd7070Spatrick
108e5dd7070Spatrick /// Moves and duplicates odd-indexed values from a 128-bit vector
109e5dd7070Spatrick /// of [4 x float] to float values stored in a 128-bit vector of
110e5dd7070Spatrick /// [4 x float].
111e5dd7070Spatrick ///
112e5dd7070Spatrick /// \headerfile <x86intrin.h>
113e5dd7070Spatrick ///
114e5dd7070Spatrick /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
115e5dd7070Spatrick ///
116e5dd7070Spatrick /// \param __a
117e5dd7070Spatrick /// A 128-bit vector of [4 x float]. \n
118e5dd7070Spatrick /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of
119e5dd7070Spatrick /// the destination. \n
120e5dd7070Spatrick /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
121e5dd7070Spatrick /// destination.
122e5dd7070Spatrick /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
123e5dd7070Spatrick /// values.
124e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_movehdup_ps(__m128 __a)125e5dd7070Spatrick _mm_movehdup_ps(__m128 __a)
126e5dd7070Spatrick {
127e5dd7070Spatrick return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
128e5dd7070Spatrick }
129e5dd7070Spatrick
130e5dd7070Spatrick /// Duplicates even-indexed values from a 128-bit vector of
131e5dd7070Spatrick /// [4 x float] to float values stored in a 128-bit vector of [4 x float].
132e5dd7070Spatrick ///
133e5dd7070Spatrick /// \headerfile <x86intrin.h>
134e5dd7070Spatrick ///
135e5dd7070Spatrick /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
136e5dd7070Spatrick ///
137e5dd7070Spatrick /// \param __a
138e5dd7070Spatrick /// A 128-bit vector of [4 x float] \n
139e5dd7070Spatrick /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of
140e5dd7070Spatrick /// the destination. \n
141e5dd7070Spatrick /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
142e5dd7070Spatrick /// destination.
143e5dd7070Spatrick /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
144e5dd7070Spatrick /// values.
145e5dd7070Spatrick static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_moveldup_ps(__m128 __a)146e5dd7070Spatrick _mm_moveldup_ps(__m128 __a)
147e5dd7070Spatrick {
148e5dd7070Spatrick return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
149e5dd7070Spatrick }
150e5dd7070Spatrick
151e5dd7070Spatrick /// Adds the even-indexed values and subtracts the odd-indexed values of
152e5dd7070Spatrick /// two 128-bit vectors of [2 x double].
153e5dd7070Spatrick ///
154e5dd7070Spatrick /// \headerfile <x86intrin.h>
155e5dd7070Spatrick ///
156e5dd7070Spatrick /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
157e5dd7070Spatrick ///
158e5dd7070Spatrick /// \param __a
159e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing the left source operand.
160e5dd7070Spatrick /// \param __b
161e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing the right source operand.
162e5dd7070Spatrick /// \returns A 128-bit vector of [2 x double] containing the alternating sums
163e5dd7070Spatrick /// and differences of both operands.
164e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_addsub_pd(__m128d __a,__m128d __b)165e5dd7070Spatrick _mm_addsub_pd(__m128d __a, __m128d __b)
166e5dd7070Spatrick {
167e5dd7070Spatrick return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
168e5dd7070Spatrick }
169e5dd7070Spatrick
170e5dd7070Spatrick /// Horizontally adds the pairs of values contained in two 128-bit
171e5dd7070Spatrick /// vectors of [2 x double].
172e5dd7070Spatrick ///
173e5dd7070Spatrick /// \headerfile <x86intrin.h>
174e5dd7070Spatrick ///
175e5dd7070Spatrick /// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
176e5dd7070Spatrick ///
177e5dd7070Spatrick /// \param __a
178e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing one of the source operands.
179e5dd7070Spatrick /// The horizontal sum of the values is stored in the lower bits of the
180e5dd7070Spatrick /// destination.
181e5dd7070Spatrick /// \param __b
182e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing one of the source operands.
183e5dd7070Spatrick /// The horizontal sum of the values is stored in the upper bits of the
184e5dd7070Spatrick /// destination.
185e5dd7070Spatrick /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
186e5dd7070Spatrick /// both operands.
187e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_hadd_pd(__m128d __a,__m128d __b)188e5dd7070Spatrick _mm_hadd_pd(__m128d __a, __m128d __b)
189e5dd7070Spatrick {
190e5dd7070Spatrick return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
191e5dd7070Spatrick }
192e5dd7070Spatrick
193e5dd7070Spatrick /// Horizontally subtracts the pairs of values contained in two 128-bit
194e5dd7070Spatrick /// vectors of [2 x double].
195e5dd7070Spatrick ///
196e5dd7070Spatrick /// \headerfile <x86intrin.h>
197e5dd7070Spatrick ///
198e5dd7070Spatrick /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
199e5dd7070Spatrick ///
200e5dd7070Spatrick /// \param __a
201e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing one of the source operands.
202e5dd7070Spatrick /// The horizontal difference of the values is stored in the lower bits of
203e5dd7070Spatrick /// the destination.
204e5dd7070Spatrick /// \param __b
205e5dd7070Spatrick /// A 128-bit vector of [2 x double] containing one of the source operands.
206e5dd7070Spatrick /// The horizontal difference of the values is stored in the upper bits of
207e5dd7070Spatrick /// the destination.
208e5dd7070Spatrick /// \returns A 128-bit vector of [2 x double] containing the horizontal
209e5dd7070Spatrick /// differences of both operands.
210e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_hsub_pd(__m128d __a,__m128d __b)211e5dd7070Spatrick _mm_hsub_pd(__m128d __a, __m128d __b)
212e5dd7070Spatrick {
213e5dd7070Spatrick return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
214e5dd7070Spatrick }
215e5dd7070Spatrick
216e5dd7070Spatrick /// Moves and duplicates one double-precision value to double-precision
217e5dd7070Spatrick /// values stored in a 128-bit vector of [2 x double].
218e5dd7070Spatrick ///
219e5dd7070Spatrick /// \headerfile <x86intrin.h>
220e5dd7070Spatrick ///
221e5dd7070Spatrick /// \code
222e5dd7070Spatrick /// __m128d _mm_loaddup_pd(double const *dp);
223e5dd7070Spatrick /// \endcode
224e5dd7070Spatrick ///
225e5dd7070Spatrick /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
226e5dd7070Spatrick ///
227e5dd7070Spatrick /// \param dp
228e5dd7070Spatrick /// A pointer to a double-precision value to be moved and duplicated.
229e5dd7070Spatrick /// \returns A 128-bit vector of [2 x double] containing the moved and
230e5dd7070Spatrick /// duplicated values.
231e5dd7070Spatrick #define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
232e5dd7070Spatrick
233e5dd7070Spatrick /// Moves and duplicates the double-precision value in the lower bits of
234e5dd7070Spatrick /// a 128-bit vector of [2 x double] to double-precision values stored in a
235e5dd7070Spatrick /// 128-bit vector of [2 x double].
236e5dd7070Spatrick ///
237e5dd7070Spatrick /// \headerfile <x86intrin.h>
238e5dd7070Spatrick ///
239e5dd7070Spatrick /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
240e5dd7070Spatrick ///
241e5dd7070Spatrick /// \param __a
242e5dd7070Spatrick /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
243e5dd7070Spatrick /// [127:64] and [63:0] of the destination.
244e5dd7070Spatrick /// \returns A 128-bit vector of [2 x double] containing the moved and
245e5dd7070Spatrick /// duplicated values.
246e5dd7070Spatrick static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_movedup_pd(__m128d __a)247e5dd7070Spatrick _mm_movedup_pd(__m128d __a)
248e5dd7070Spatrick {
249e5dd7070Spatrick return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
250e5dd7070Spatrick }
251e5dd7070Spatrick
252e5dd7070Spatrick /// Establishes a linear address memory range to be monitored and puts
253e5dd7070Spatrick /// the processor in the monitor event pending state. Data stored in the
254e5dd7070Spatrick /// monitored address range causes the processor to exit the pending state.
255e5dd7070Spatrick ///
256e5dd7070Spatrick /// \headerfile <x86intrin.h>
257e5dd7070Spatrick ///
258e5dd7070Spatrick /// This intrinsic corresponds to the <c> MONITOR </c> instruction.
259e5dd7070Spatrick ///
260e5dd7070Spatrick /// \param __p
261e5dd7070Spatrick /// The memory range to be monitored. The size of the range is determined by
262e5dd7070Spatrick /// CPUID function 0000_0005h.
263e5dd7070Spatrick /// \param __extensions
264e5dd7070Spatrick /// Optional extensions for the monitoring state.
265e5dd7070Spatrick /// \param __hints
266e5dd7070Spatrick /// Optional hints for the monitoring state.
267e5dd7070Spatrick static __inline__ void __DEFAULT_FN_ATTRS
_mm_monitor(void const * __p,unsigned __extensions,unsigned __hints)268e5dd7070Spatrick _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
269e5dd7070Spatrick {
270e5dd7070Spatrick __builtin_ia32_monitor(__p, __extensions, __hints);
271e5dd7070Spatrick }
272e5dd7070Spatrick
273e5dd7070Spatrick /// Used with the MONITOR instruction to wait while the processor is in
274e5dd7070Spatrick /// the monitor event pending state. Data stored in the monitored address
275e5dd7070Spatrick /// range causes the processor to exit the pending state.
276e5dd7070Spatrick ///
277e5dd7070Spatrick /// \headerfile <x86intrin.h>
278e5dd7070Spatrick ///
279e5dd7070Spatrick /// This intrinsic corresponds to the <c> MWAIT </c> instruction.
280e5dd7070Spatrick ///
281e5dd7070Spatrick /// \param __extensions
282e5dd7070Spatrick /// Optional extensions for the monitoring state, which may vary by
283e5dd7070Spatrick /// processor.
284e5dd7070Spatrick /// \param __hints
285e5dd7070Spatrick /// Optional hints for the monitoring state, which may vary by processor.
286e5dd7070Spatrick static __inline__ void __DEFAULT_FN_ATTRS
_mm_mwait(unsigned __extensions,unsigned __hints)287e5dd7070Spatrick _mm_mwait(unsigned __extensions, unsigned __hints)
288e5dd7070Spatrick {
289e5dd7070Spatrick __builtin_ia32_mwait(__extensions, __hints);
290e5dd7070Spatrick }
291e5dd7070Spatrick
292e5dd7070Spatrick #undef __DEFAULT_FN_ATTRS
293e5dd7070Spatrick
294e5dd7070Spatrick #endif /* __PMMINTRIN_H */
295