xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Headers/ammintrin.h (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1*7330f729Sjoerg /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2*7330f729Sjoerg  *
3*7330f729Sjoerg  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*7330f729Sjoerg  * See https://llvm.org/LICENSE.txt for license information.
5*7330f729Sjoerg  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*7330f729Sjoerg  *
7*7330f729Sjoerg  *===-----------------------------------------------------------------------===
8*7330f729Sjoerg  */
9*7330f729Sjoerg 
10*7330f729Sjoerg #ifndef __AMMINTRIN_H
11*7330f729Sjoerg #define __AMMINTRIN_H
12*7330f729Sjoerg 
13*7330f729Sjoerg #include <pmmintrin.h>
14*7330f729Sjoerg 
15*7330f729Sjoerg /* Define the default attributes for the functions in this file. */
16*7330f729Sjoerg #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
17*7330f729Sjoerg 
18*7330f729Sjoerg /// Extracts the specified bits from the lower 64 bits of the 128-bit
19*7330f729Sjoerg ///    integer vector operand at the index \a idx and of the length \a len.
20*7330f729Sjoerg ///
21*7330f729Sjoerg /// \headerfile <x86intrin.h>
22*7330f729Sjoerg ///
23*7330f729Sjoerg /// \code
24*7330f729Sjoerg /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
25*7330f729Sjoerg /// \endcode
26*7330f729Sjoerg ///
27*7330f729Sjoerg /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
28*7330f729Sjoerg ///
29*7330f729Sjoerg /// \param x
30*7330f729Sjoerg ///    The value from which bits are extracted.
31*7330f729Sjoerg /// \param len
32*7330f729Sjoerg ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
33*7330f729Sjoerg ///    are zero, the length is interpreted as 64.
34*7330f729Sjoerg /// \param idx
35*7330f729Sjoerg ///    Bits [5:0] specify the index of the least significant bit; the other
36*7330f729Sjoerg ///    bits are ignored. If the sum of the index and length is greater than 64,
37*7330f729Sjoerg ///    the result is undefined. If the length and index are both zero, bits
38*7330f729Sjoerg ///    [63:0] of parameter \a x are extracted. If the length is zero but the
39*7330f729Sjoerg ///    index is non-zero, the result is undefined.
40*7330f729Sjoerg /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
41*7330f729Sjoerg ///    extracted from the source operand.
42*7330f729Sjoerg #define _mm_extracti_si64(x, len, idx) \
43*7330f729Sjoerg   ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
44*7330f729Sjoerg                                   (char)(len), (char)(idx)))
45*7330f729Sjoerg 
46*7330f729Sjoerg /// Extracts the specified bits from the lower 64 bits of the 128-bit
47*7330f729Sjoerg ///    integer vector operand at the index and of the length specified by
48*7330f729Sjoerg ///    \a __y.
49*7330f729Sjoerg ///
50*7330f729Sjoerg /// \headerfile <x86intrin.h>
51*7330f729Sjoerg ///
52*7330f729Sjoerg /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
53*7330f729Sjoerg ///
54*7330f729Sjoerg /// \param __x
55*7330f729Sjoerg ///    The value from which bits are extracted.
56*7330f729Sjoerg /// \param __y
57*7330f729Sjoerg ///    Specifies the index of the least significant bit at [13:8] and the
58*7330f729Sjoerg ///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
59*7330f729Sjoerg ///    length is interpreted as 64. If the sum of the index and length is
60*7330f729Sjoerg ///    greater than 64, the result is undefined. If the length and index are
61*7330f729Sjoerg ///    both zero, bits [63:0] of parameter \a __x are extracted. If the length
62*7330f729Sjoerg ///    is zero but the index is non-zero, the result is undefined.
63*7330f729Sjoerg /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
64*7330f729Sjoerg ///    from the source operand.
65*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_extract_si64(__m128i __x,__m128i __y)66*7330f729Sjoerg _mm_extract_si64(__m128i __x, __m128i __y)
67*7330f729Sjoerg {
68*7330f729Sjoerg   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
69*7330f729Sjoerg }
70*7330f729Sjoerg 
71*7330f729Sjoerg /// Inserts bits of a specified length from the source integer vector
72*7330f729Sjoerg ///    \a y into the lower 64 bits of the destination integer vector \a x at
73*7330f729Sjoerg ///    the index \a idx and of the length \a len.
74*7330f729Sjoerg ///
75*7330f729Sjoerg /// \headerfile <x86intrin.h>
76*7330f729Sjoerg ///
77*7330f729Sjoerg /// \code
78*7330f729Sjoerg /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
79*7330f729Sjoerg /// const int idx);
80*7330f729Sjoerg /// \endcode
81*7330f729Sjoerg ///
82*7330f729Sjoerg /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
83*7330f729Sjoerg ///
84*7330f729Sjoerg /// \param x
85*7330f729Sjoerg ///    The destination operand where bits will be inserted. The inserted bits
86*7330f729Sjoerg ///    are defined by the length \a len and by the index \a idx specifying the
87*7330f729Sjoerg ///    least significant bit.
88*7330f729Sjoerg /// \param y
89*7330f729Sjoerg ///    The source operand containing the bits to be extracted. The extracted
90*7330f729Sjoerg ///    bits are the least significant bits of operand \a y of length \a len.
91*7330f729Sjoerg /// \param len
92*7330f729Sjoerg ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
93*7330f729Sjoerg ///    are zero, the length is interpreted as 64.
94*7330f729Sjoerg /// \param idx
95*7330f729Sjoerg ///    Bits [5:0] specify the index of the least significant bit; the other
96*7330f729Sjoerg ///    bits are ignored. If the sum of the index and length is greater than 64,
97*7330f729Sjoerg ///    the result is undefined. If the length and index are both zero, bits
98*7330f729Sjoerg ///    [63:0] of parameter \a y are inserted into parameter \a x. If the length
99*7330f729Sjoerg ///    is zero but the index is non-zero, the result is undefined.
100*7330f729Sjoerg /// \returns A 128-bit integer vector containing the original lower 64-bits of
101*7330f729Sjoerg ///    destination operand \a x with the specified bitfields replaced by the
102*7330f729Sjoerg ///    lower bits of source operand \a y. The upper 64 bits of the return value
103*7330f729Sjoerg ///    are undefined.
104*7330f729Sjoerg #define _mm_inserti_si64(x, y, len, idx) \
105*7330f729Sjoerg   ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
106*7330f729Sjoerg                                     (__v2di)(__m128i)(y), \
107*7330f729Sjoerg                                     (char)(len), (char)(idx)))
108*7330f729Sjoerg 
109*7330f729Sjoerg /// Inserts bits of a specified length from the source integer vector
110*7330f729Sjoerg ///    \a __y into the lower 64 bits of the destination integer vector \a __x
111*7330f729Sjoerg ///    at the index and of the length specified by \a __y.
112*7330f729Sjoerg ///
113*7330f729Sjoerg /// \headerfile <x86intrin.h>
114*7330f729Sjoerg ///
115*7330f729Sjoerg /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
116*7330f729Sjoerg ///
117*7330f729Sjoerg /// \param __x
118*7330f729Sjoerg ///    The destination operand where bits will be inserted. The inserted bits
119*7330f729Sjoerg ///    are defined by the length and by the index of the least significant bit
120*7330f729Sjoerg ///    specified by operand \a __y.
121*7330f729Sjoerg /// \param __y
122*7330f729Sjoerg ///    The source operand containing the bits to be extracted. The extracted
123*7330f729Sjoerg ///    bits are the least significant bits of operand \a __y with length
124*7330f729Sjoerg ///    specified by bits [69:64]. These are inserted into the destination at the
125*7330f729Sjoerg ///    index specified by bits [77:72]; all other bits are ignored. If bits
126*7330f729Sjoerg ///    [69:64] are zero, the length is interpreted as 64. If the sum of the
127*7330f729Sjoerg ///    index and length is greater than 64, the result is undefined. If the
128*7330f729Sjoerg ///    length and index are both zero, bits [63:0] of parameter \a __y are
129*7330f729Sjoerg ///    inserted into parameter \a __x. If the length is zero but the index is
130*7330f729Sjoerg ///    non-zero, the result is undefined.
131*7330f729Sjoerg /// \returns A 128-bit integer vector containing the original lower 64-bits of
132*7330f729Sjoerg ///    destination operand \a __x with the specified bitfields replaced by the
133*7330f729Sjoerg ///    lower bits of source operand \a __y. The upper 64 bits of the return
134*7330f729Sjoerg ///    value are undefined.
135*7330f729Sjoerg static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_si64(__m128i __x,__m128i __y)136*7330f729Sjoerg _mm_insert_si64(__m128i __x, __m128i __y)
137*7330f729Sjoerg {
138*7330f729Sjoerg   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
139*7330f729Sjoerg }
140*7330f729Sjoerg 
141*7330f729Sjoerg /// Stores a 64-bit double-precision value in a 64-bit memory location.
142*7330f729Sjoerg ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
143*7330f729Sjoerg ///    used again soon).
144*7330f729Sjoerg ///
145*7330f729Sjoerg /// \headerfile <x86intrin.h>
146*7330f729Sjoerg ///
147*7330f729Sjoerg /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
148*7330f729Sjoerg ///
149*7330f729Sjoerg /// \param __p
150*7330f729Sjoerg ///    The 64-bit memory location used to store the register value.
151*7330f729Sjoerg /// \param __a
152*7330f729Sjoerg ///    The 64-bit double-precision floating-point register value to be stored.
153*7330f729Sjoerg static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_sd(double * __p,__m128d __a)154*7330f729Sjoerg _mm_stream_sd(double *__p, __m128d __a)
155*7330f729Sjoerg {
156*7330f729Sjoerg   __builtin_ia32_movntsd(__p, (__v2df)__a);
157*7330f729Sjoerg }
158*7330f729Sjoerg 
159*7330f729Sjoerg /// Stores a 32-bit single-precision floating-point value in a 32-bit
160*7330f729Sjoerg ///    memory location. To minimize caching, the data is flagged as
161*7330f729Sjoerg ///    non-temporal (unlikely to be used again soon).
162*7330f729Sjoerg ///
163*7330f729Sjoerg /// \headerfile <x86intrin.h>
164*7330f729Sjoerg ///
165*7330f729Sjoerg /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
166*7330f729Sjoerg ///
167*7330f729Sjoerg /// \param __p
168*7330f729Sjoerg ///    The 32-bit memory location used to store the register value.
169*7330f729Sjoerg /// \param __a
170*7330f729Sjoerg ///    The 32-bit single-precision floating-point register value to be stored.
171*7330f729Sjoerg static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ss(float * __p,__m128 __a)172*7330f729Sjoerg _mm_stream_ss(float *__p, __m128 __a)
173*7330f729Sjoerg {
174*7330f729Sjoerg   __builtin_ia32_movntss(__p, (__v4sf)__a);
175*7330f729Sjoerg }
176*7330f729Sjoerg 
177*7330f729Sjoerg #undef __DEFAULT_FN_ATTRS
178*7330f729Sjoerg 
179*7330f729Sjoerg #endif /* __AMMINTRIN_H */
180