10b57cec5SDimitry Andric /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
20b57cec5SDimitry Andric *
30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric *
70b57cec5SDimitry Andric *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric */
90b57cec5SDimitry Andric
10*06c3fb27SDimitry Andric #ifndef __IMMINTRIN_H
11*06c3fb27SDimitry Andric #error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
120b57cec5SDimitry Andric #endif
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #ifndef __BMI2INTRIN_H
150b57cec5SDimitry Andric #define __BMI2INTRIN_H
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
180b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
190b57cec5SDimitry Andric
20*06c3fb27SDimitry Andric /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
21*06c3fb27SDimitry Andric /// starting at bit number \a __Y.
22*06c3fb27SDimitry Andric ///
23*06c3fb27SDimitry Andric /// \code{.operation}
24*06c3fb27SDimitry Andric /// i := __Y[7:0]
25*06c3fb27SDimitry Andric /// result := __X
26*06c3fb27SDimitry Andric /// IF i < 32
27*06c3fb27SDimitry Andric /// result[31:i] := 0
28*06c3fb27SDimitry Andric /// FI
29*06c3fb27SDimitry Andric /// \endcode
30*06c3fb27SDimitry Andric ///
31*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
32*06c3fb27SDimitry Andric ///
33*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c BZHI instruction.
34*06c3fb27SDimitry Andric ///
35*06c3fb27SDimitry Andric /// \param __X
36*06c3fb27SDimitry Andric /// The 32-bit source value to copy.
37*06c3fb27SDimitry Andric /// \param __Y
38*06c3fb27SDimitry Andric /// The lower 8 bits specify the bit number of the lowest bit to zero.
39*06c3fb27SDimitry Andric /// \returns The partially zeroed 32-bit value.
400b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bzhi_u32(unsigned int __X,unsigned int __Y)410b57cec5SDimitry Andric _bzhi_u32(unsigned int __X, unsigned int __Y)
420b57cec5SDimitry Andric {
430b57cec5SDimitry Andric return __builtin_ia32_bzhi_si(__X, __Y);
440b57cec5SDimitry Andric }
450b57cec5SDimitry Andric
46*06c3fb27SDimitry Andric /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
47*06c3fb27SDimitry Andric /// into the 32-bit result, according to the mask in the unsigned 32-bit
48*06c3fb27SDimitry Andric /// integer \a __Y. All other bits of the result are zero.
49*06c3fb27SDimitry Andric ///
50*06c3fb27SDimitry Andric /// \code{.operation}
51*06c3fb27SDimitry Andric /// i := 0
52*06c3fb27SDimitry Andric /// result := 0
53*06c3fb27SDimitry Andric /// FOR m := 0 TO 31
54*06c3fb27SDimitry Andric /// IF __Y[m] == 1
55*06c3fb27SDimitry Andric /// result[m] := __X[i]
56*06c3fb27SDimitry Andric /// i := i + 1
57*06c3fb27SDimitry Andric /// ENDIF
58*06c3fb27SDimitry Andric /// ENDFOR
59*06c3fb27SDimitry Andric /// \endcode
60*06c3fb27SDimitry Andric ///
61*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
62*06c3fb27SDimitry Andric ///
63*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c PDEP instruction.
64*06c3fb27SDimitry Andric ///
65*06c3fb27SDimitry Andric /// \param __X
66*06c3fb27SDimitry Andric /// The 32-bit source value to copy.
67*06c3fb27SDimitry Andric /// \param __Y
68*06c3fb27SDimitry Andric /// The 32-bit mask specifying where to deposit source bits.
69*06c3fb27SDimitry Andric /// \returns The 32-bit result.
700b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pdep_u32(unsigned int __X,unsigned int __Y)710b57cec5SDimitry Andric _pdep_u32(unsigned int __X, unsigned int __Y)
720b57cec5SDimitry Andric {
730b57cec5SDimitry Andric return __builtin_ia32_pdep_si(__X, __Y);
740b57cec5SDimitry Andric }
750b57cec5SDimitry Andric
76*06c3fb27SDimitry Andric /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
77*06c3fb27SDimitry Andric /// low-order bits of the 32-bit result, according to the mask in the
78*06c3fb27SDimitry Andric /// unsigned 32-bit integer \a __Y. All other bits of the result are zero.
79*06c3fb27SDimitry Andric ///
80*06c3fb27SDimitry Andric /// \code{.operation}
81*06c3fb27SDimitry Andric /// i := 0
82*06c3fb27SDimitry Andric /// result := 0
83*06c3fb27SDimitry Andric /// FOR m := 0 TO 31
84*06c3fb27SDimitry Andric /// IF __Y[m] == 1
85*06c3fb27SDimitry Andric /// result[i] := __X[m]
86*06c3fb27SDimitry Andric /// i := i + 1
87*06c3fb27SDimitry Andric /// ENDIF
88*06c3fb27SDimitry Andric /// ENDFOR
89*06c3fb27SDimitry Andric /// \endcode
90*06c3fb27SDimitry Andric ///
91*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
92*06c3fb27SDimitry Andric ///
93*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c PEXT instruction.
94*06c3fb27SDimitry Andric ///
95*06c3fb27SDimitry Andric /// \param __X
96*06c3fb27SDimitry Andric /// The 32-bit source value to copy.
97*06c3fb27SDimitry Andric /// \param __Y
98*06c3fb27SDimitry Andric /// The 32-bit mask specifying which source bits to extract.
99*06c3fb27SDimitry Andric /// \returns The 32-bit result.
1000b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS
_pext_u32(unsigned int __X,unsigned int __Y)1010b57cec5SDimitry Andric _pext_u32(unsigned int __X, unsigned int __Y)
1020b57cec5SDimitry Andric {
1030b57cec5SDimitry Andric return __builtin_ia32_pext_si(__X, __Y);
1040b57cec5SDimitry Andric }
1050b57cec5SDimitry Andric
106*06c3fb27SDimitry Andric /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
107*06c3fb27SDimitry Andric /// 64-bit product. Stores the upper 32 bits of the product in the
108*06c3fb27SDimitry Andric /// memory at \a __P and returns the lower 32 bits.
109*06c3fb27SDimitry Andric ///
110*06c3fb27SDimitry Andric /// \code{.operation}
111*06c3fb27SDimitry Andric /// Store32(__P, (__X * __Y)[63:32])
112*06c3fb27SDimitry Andric /// result := (__X * __Y)[31:0]
113*06c3fb27SDimitry Andric /// \endcode
114*06c3fb27SDimitry Andric ///
115*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
116*06c3fb27SDimitry Andric ///
117*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MULX instruction.
118*06c3fb27SDimitry Andric ///
119*06c3fb27SDimitry Andric /// \param __X
120*06c3fb27SDimitry Andric /// An unsigned 32-bit multiplicand.
121*06c3fb27SDimitry Andric /// \param __Y
122*06c3fb27SDimitry Andric /// An unsigned 32-bit multiplicand.
123*06c3fb27SDimitry Andric /// \param __P
124*06c3fb27SDimitry Andric /// A pointer to memory for storing the upper half of the product.
125*06c3fb27SDimitry Andric /// \returns The lower half of the product.
126*06c3fb27SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS
_mulx_u32(unsigned int __X,unsigned int __Y,unsigned int * __P)127*06c3fb27SDimitry Andric _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
128*06c3fb27SDimitry Andric {
129*06c3fb27SDimitry Andric unsigned long long __res = (unsigned long long) __X * __Y;
130*06c3fb27SDimitry Andric *__P = (unsigned int)(__res >> 32);
131*06c3fb27SDimitry Andric return (unsigned int)__res;
132*06c3fb27SDimitry Andric }
133*06c3fb27SDimitry Andric
1340b57cec5SDimitry Andric #ifdef __x86_64__
1350b57cec5SDimitry Andric
136*06c3fb27SDimitry Andric /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
137*06c3fb27SDimitry Andric /// starting at bit number \a __Y.
138*06c3fb27SDimitry Andric ///
139*06c3fb27SDimitry Andric /// \code{.operation}
140*06c3fb27SDimitry Andric /// i := __Y[7:0]
141*06c3fb27SDimitry Andric /// result := __X
142*06c3fb27SDimitry Andric /// IF i < 64
143*06c3fb27SDimitry Andric /// result[63:i] := 0
144*06c3fb27SDimitry Andric /// FI
145*06c3fb27SDimitry Andric /// \endcode
146*06c3fb27SDimitry Andric ///
147*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
148*06c3fb27SDimitry Andric ///
149*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c BZHI instruction.
150*06c3fb27SDimitry Andric ///
151*06c3fb27SDimitry Andric /// \param __X
152*06c3fb27SDimitry Andric /// The 64-bit source value to copy.
153*06c3fb27SDimitry Andric /// \param __Y
154*06c3fb27SDimitry Andric /// The lower 8 bits specify the bit number of the lowest bit to zero.
155*06c3fb27SDimitry Andric /// \returns The partially zeroed 64-bit value.
1560b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bzhi_u64(unsigned long long __X,unsigned long long __Y)1570b57cec5SDimitry Andric _bzhi_u64(unsigned long long __X, unsigned long long __Y)
1580b57cec5SDimitry Andric {
1590b57cec5SDimitry Andric return __builtin_ia32_bzhi_di(__X, __Y);
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric
162*06c3fb27SDimitry Andric /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
163*06c3fb27SDimitry Andric /// into the 64-bit result, according to the mask in the unsigned 64-bit
164*06c3fb27SDimitry Andric /// integer \a __Y. All other bits of the result are zero.
165*06c3fb27SDimitry Andric ///
166*06c3fb27SDimitry Andric /// \code{.operation}
167*06c3fb27SDimitry Andric /// i := 0
168*06c3fb27SDimitry Andric /// result := 0
169*06c3fb27SDimitry Andric /// FOR m := 0 TO 63
170*06c3fb27SDimitry Andric /// IF __Y[m] == 1
171*06c3fb27SDimitry Andric /// result[m] := __X[i]
172*06c3fb27SDimitry Andric /// i := i + 1
173*06c3fb27SDimitry Andric /// ENDIF
174*06c3fb27SDimitry Andric /// ENDFOR
175*06c3fb27SDimitry Andric /// \endcode
176*06c3fb27SDimitry Andric ///
177*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
178*06c3fb27SDimitry Andric ///
179*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c PDEP instruction.
180*06c3fb27SDimitry Andric ///
181*06c3fb27SDimitry Andric /// \param __X
182*06c3fb27SDimitry Andric /// The 64-bit source value to copy.
183*06c3fb27SDimitry Andric /// \param __Y
184*06c3fb27SDimitry Andric /// The 64-bit mask specifying where to deposit source bits.
185*06c3fb27SDimitry Andric /// \returns The 64-bit result.
1860b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pdep_u64(unsigned long long __X,unsigned long long __Y)1870b57cec5SDimitry Andric _pdep_u64(unsigned long long __X, unsigned long long __Y)
1880b57cec5SDimitry Andric {
1890b57cec5SDimitry Andric return __builtin_ia32_pdep_di(__X, __Y);
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
192*06c3fb27SDimitry Andric /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193*06c3fb27SDimitry Andric /// low-order bits of the 64-bit result, according to the mask in the
194*06c3fb27SDimitry Andric /// unsigned 64-bit integer \a __Y. All other bits of the result are zero.
195*06c3fb27SDimitry Andric ///
196*06c3fb27SDimitry Andric /// \code{.operation}
197*06c3fb27SDimitry Andric /// i := 0
198*06c3fb27SDimitry Andric /// result := 0
199*06c3fb27SDimitry Andric /// FOR m := 0 TO 63
200*06c3fb27SDimitry Andric /// IF __Y[m] == 1
201*06c3fb27SDimitry Andric /// result[i] := __X[m]
202*06c3fb27SDimitry Andric /// i := i + 1
203*06c3fb27SDimitry Andric /// ENDIF
204*06c3fb27SDimitry Andric /// ENDFOR
205*06c3fb27SDimitry Andric /// \endcode
206*06c3fb27SDimitry Andric ///
207*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
208*06c3fb27SDimitry Andric ///
209*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c PEXT instruction.
210*06c3fb27SDimitry Andric ///
211*06c3fb27SDimitry Andric /// \param __X
212*06c3fb27SDimitry Andric /// The 64-bit source value to copy.
213*06c3fb27SDimitry Andric /// \param __Y
214*06c3fb27SDimitry Andric /// The 64-bit mask specifying which source bits to extract.
215*06c3fb27SDimitry Andric /// \returns The 64-bit result.
2160b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_pext_u64(unsigned long long __X,unsigned long long __Y)2170b57cec5SDimitry Andric _pext_u64(unsigned long long __X, unsigned long long __Y)
2180b57cec5SDimitry Andric {
2190b57cec5SDimitry Andric return __builtin_ia32_pext_di(__X, __Y);
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric
222*06c3fb27SDimitry Andric /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
223*06c3fb27SDimitry Andric /// 128-bit product. Stores the upper 64 bits of the product to the
224*06c3fb27SDimitry Andric /// memory addressed by \a __P and returns the lower 64 bits.
225*06c3fb27SDimitry Andric ///
226*06c3fb27SDimitry Andric /// \code{.operation}
227*06c3fb27SDimitry Andric /// Store64(__P, (__X * __Y)[127:64])
228*06c3fb27SDimitry Andric /// result := (__X * __Y)[63:0]
229*06c3fb27SDimitry Andric /// \endcode
230*06c3fb27SDimitry Andric ///
231*06c3fb27SDimitry Andric /// \headerfile <immintrin.h>
232*06c3fb27SDimitry Andric ///
233*06c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MULX instruction.
234*06c3fb27SDimitry Andric ///
235*06c3fb27SDimitry Andric /// \param __X
236*06c3fb27SDimitry Andric /// An unsigned 64-bit multiplicand.
237*06c3fb27SDimitry Andric /// \param __Y
238*06c3fb27SDimitry Andric /// An unsigned 64-bit multiplicand.
239*06c3fb27SDimitry Andric /// \param __P
240*06c3fb27SDimitry Andric /// A pointer to memory for storing the upper half of the product.
241*06c3fb27SDimitry Andric /// \returns The lower half of the product.
2420b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mulx_u64(unsigned long long __X,unsigned long long __Y,unsigned long long * __P)2430b57cec5SDimitry Andric _mulx_u64 (unsigned long long __X, unsigned long long __Y,
2440b57cec5SDimitry Andric unsigned long long *__P)
2450b57cec5SDimitry Andric {
2460b57cec5SDimitry Andric unsigned __int128 __res = (unsigned __int128) __X * __Y;
2470b57cec5SDimitry Andric *__P = (unsigned long long) (__res >> 64);
2480b57cec5SDimitry Andric return (unsigned long long) __res;
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric
251*06c3fb27SDimitry Andric #endif /* __x86_64__ */
2520b57cec5SDimitry Andric
2530b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andric #endif /* __BMI2INTRIN_H */
256