xref: /llvm-project/clang/lib/Headers/bmiintrin.h (revision 74cfd203c36d314e3a47c3e239776a1a90424531)
1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __BMIINTRIN_H
15 #define __BMIINTRIN_H
16 
17 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18    instruction behaves as BSF on non-BMI targets, there is code that expects
19    to use it as a potentially faster version of BSF. */
20 #if defined(__cplusplus) && (__cplusplus >= 201103L)
21 #define __RELAXED_FN_ATTRS                                                     \
22   __attribute__((__always_inline__, __nodebug__)) constexpr
23 #else
24 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
25 #endif
26 
27 /// Counts the number of trailing zero bits in the operand.
28 ///
29 /// \headerfile <x86intrin.h>
30 ///
31 /// This intrinsic corresponds to the \c TZCNT instruction.
32 ///
33 /// \param __X
34 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
35 /// \returns An unsigned 16-bit integer containing the number of trailing zero
36 ///    bits in the operand.
37 /// \see _tzcnt_u16
38 static __inline__ unsigned short __RELAXED_FN_ATTRS
39 __tzcnt_u16(unsigned short __X) {
40   return __builtin_ia32_tzcnt_u16(__X);
41 }
42 
43 /// Counts the number of trailing zero bits in the operand.
44 ///
45 /// \headerfile <x86intrin.h>
46 ///
47 /// \code
48 /// unsigned short _tzcnt_u16(unsigned short __X);
49 /// \endcode
50 ///
51 /// This intrinsic corresponds to the \c TZCNT instruction.
52 ///
53 /// \param __X
54 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
55 /// \returns An unsigned 16-bit integer containing the number of trailing zero
56 ///    bits in the operand.
57 /// \see __tzcnt_u16
58 #define _tzcnt_u16 __tzcnt_u16
59 
60 /// Counts the number of trailing zero bits in the operand.
61 ///
62 /// \headerfile <x86intrin.h>
63 ///
64 /// This intrinsic corresponds to the \c TZCNT instruction.
65 ///
66 /// \param __X
67 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
68 /// \returns An unsigned 32-bit integer containing the number of trailing zero
69 ///    bits in the operand.
70 /// \see { _mm_tzcnt_32 _tzcnt_u32 }
71 static __inline__ unsigned int __RELAXED_FN_ATTRS
72 __tzcnt_u32(unsigned int __X) {
73   return __builtin_ia32_tzcnt_u32(__X);
74 }
75 
76 /// Counts the number of trailing zero bits in the operand.
77 ///
78 /// \headerfile <x86intrin.h>
79 ///
80 /// This intrinsic corresponds to the \c TZCNT instruction.
81 ///
82 /// \param __X
83 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
84 /// \returns A 32-bit integer containing the number of trailing zero bits in
85 ///    the operand.
86 /// \see { __tzcnt_u32 _tzcnt_u32 }
87 static __inline__ int __RELAXED_FN_ATTRS
88 _mm_tzcnt_32(unsigned int __X) {
89   return (int)__builtin_ia32_tzcnt_u32(__X);
90 }
91 
92 /// Counts the number of trailing zero bits in the operand.
93 ///
94 /// \headerfile <x86intrin.h>
95 ///
96 /// \code
97 /// unsigned int _tzcnt_u32(unsigned int __X);
98 /// \endcode
99 ///
100 /// This intrinsic corresponds to the \c TZCNT instruction.
101 ///
102 /// \param __X
103 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
104 /// \returns An unsigned 32-bit integer containing the number of trailing zero
105 ///    bits in the operand.
106 /// \see { _mm_tzcnt_32 __tzcnt_u32 }
107 #define _tzcnt_u32 __tzcnt_u32
108 
109 #ifdef __x86_64__
110 
111 /// Counts the number of trailing zero bits in the operand.
112 ///
113 /// \headerfile <x86intrin.h>
114 ///
115 /// This intrinsic corresponds to the \c TZCNT instruction.
116 ///
117 /// \param __X
118 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
119 /// \returns An unsigned 64-bit integer containing the number of trailing zero
120 ///    bits in the operand.
121 /// \see { _mm_tzcnt_64 _tzcnt_u64 }
122 static __inline__ unsigned long long __RELAXED_FN_ATTRS
123 __tzcnt_u64(unsigned long long __X) {
124   return __builtin_ia32_tzcnt_u64(__X);
125 }
126 
127 /// Counts the number of trailing zero bits in the operand.
128 ///
129 /// \headerfile <x86intrin.h>
130 ///
131 /// This intrinsic corresponds to the \c TZCNT instruction.
132 ///
133 /// \param __X
134 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
135 /// \returns An 64-bit integer containing the number of trailing zero bits in
136 ///    the operand.
137 /// \see { __tzcnt_u64 _tzcnt_u64 }
138 static __inline__ long long __RELAXED_FN_ATTRS
139 _mm_tzcnt_64(unsigned long long __X) {
140   return (long long)__builtin_ia32_tzcnt_u64(__X);
141 }
142 
143 /// Counts the number of trailing zero bits in the operand.
144 ///
145 /// \headerfile <x86intrin.h>
146 ///
147 /// \code
148 /// unsigned long long _tzcnt_u64(unsigned long long __X);
149 /// \endcode
150 ///
151 /// This intrinsic corresponds to the \c TZCNT instruction.
152 ///
153 /// \param __X
154 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
155 /// \returns An unsigned 64-bit integer containing the number of trailing zero
156 ///    bits in the operand.
157 /// \see { _mm_tzcnt_64 __tzcnt_u64
158 #define _tzcnt_u64 __tzcnt_u64
159 
160 #endif /* __x86_64__ */
161 
162 #undef __RELAXED_FN_ATTRS
163 
164 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__)
165 
166 /* Define the default attributes for the functions in this file. */
167 #if defined(__cplusplus) && (__cplusplus >= 201103L)
168 #define __DEFAULT_FN_ATTRS                                                     \
169   __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) constexpr
170 #else
171 #define __DEFAULT_FN_ATTRS                                                     \
172   __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
173 #endif
174 
175 /// Performs a bitwise AND of the second operand with the one's
176 ///    complement of the first operand.
177 ///
178 /// \headerfile <x86intrin.h>
179 ///
180 /// This intrinsic corresponds to the \c ANDN instruction.
181 ///
182 /// \param __X
183 ///    An unsigned integer containing one of the operands.
184 /// \param __Y
185 ///    An unsigned integer containing one of the operands.
186 /// \returns An unsigned integer containing the bitwise AND of the second
187 ///    operand with the one's complement of the first operand.
188 /// \see _andn_u32
189 static __inline__ unsigned int __DEFAULT_FN_ATTRS
190 __andn_u32(unsigned int __X, unsigned int __Y) {
191   return ~__X & __Y;
192 }
193 
194 /// Performs a bitwise AND of the second operand with the one's
195 ///    complement of the first operand.
196 ///
197 /// \headerfile <x86intrin.h>
198 ///
199 /// \code
200 /// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
201 /// \endcode
202 ///
203 /// This intrinsic corresponds to the \c ANDN instruction.
204 ///
205 /// \param __X
206 ///    An unsigned integer containing one of the operands.
207 /// \param __Y
208 ///    An unsigned integer containing one of the operands.
209 /// \returns An unsigned integer containing the bitwise AND of the second
210 ///    operand with the one's complement of the first operand.
211 /// \see __andn_u32
212 #define _andn_u32 __andn_u32
213 
214 /* AMD-specified, double-leading-underscore version of BEXTR */
215 /// Extracts the specified bits from the first operand and returns them
216 ///    in the least significant bits of the result.
217 ///
218 /// \headerfile <x86intrin.h>
219 ///
220 /// This intrinsic corresponds to the \c BEXTR instruction.
221 ///
222 /// \param __X
223 ///    An unsigned integer whose bits are to be extracted.
224 /// \param __Y
225 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
226 ///    specify the index of the least significant bit. Bits [15:8] specify the
227 ///    number of bits to be extracted.
228 /// \returns An unsigned integer whose least significant bits contain the
229 ///    extracted bits.
230 /// \see _bextr_u32
231 static __inline__ unsigned int __DEFAULT_FN_ATTRS
232 __bextr_u32(unsigned int __X, unsigned int __Y) {
233   return __builtin_ia32_bextr_u32(__X, __Y);
234 }
235 
236 /* Intel-specified, single-leading-underscore version of BEXTR */
237 /// Extracts the specified bits from the first operand and returns them
238 ///    in the least significant bits of the result.
239 ///
240 /// \headerfile <x86intrin.h>
241 ///
242 /// This intrinsic corresponds to the \c BEXTR instruction.
243 ///
244 /// \param __X
245 ///    An unsigned integer whose bits are to be extracted.
246 /// \param __Y
247 ///    An unsigned integer used to specify the index of the least significant
248 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
249 /// \param __Z
250 ///    An unsigned integer used to specify the number of bits to be extracted.
251 ///    Bits [7:0] specify the number of bits.
252 /// \returns An unsigned integer whose least significant bits contain the
253 ///    extracted bits.
254 /// \see __bextr_u32
255 static __inline__ unsigned int __DEFAULT_FN_ATTRS
256 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) {
257   return __builtin_ia32_bextr_u32(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
258 }
259 
260 /* Intel-specified, single-leading-underscore version of BEXTR2 */
261 /// Extracts the specified bits from the first operand and returns them
262 ///    in the least significant bits of the result.
263 ///
264 /// \headerfile <x86intrin.h>
265 ///
266 /// This intrinsic corresponds to the \c BEXTR instruction.
267 ///
268 /// \param __X
269 ///    An unsigned integer whose bits are to be extracted.
270 /// \param __Y
271 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
272 ///    specify the index of the least significant bit. Bits [15:8] specify the
273 ///    number of bits to be extracted.
274 /// \returns An unsigned integer whose least significant bits contain the
275 ///    extracted bits.
276 /// \see __bextr_u32
277 static __inline__ unsigned int __DEFAULT_FN_ATTRS
278 _bextr2_u32(unsigned int __X, unsigned int __Y) {
279   return __builtin_ia32_bextr_u32(__X, __Y);
280 }
281 
282 /// Clears all bits in the source except for the least significant bit
283 ///    containing a value of 1 and returns the result.
284 ///
285 /// \headerfile <x86intrin.h>
286 ///
287 /// This intrinsic corresponds to the \c BLSI instruction.
288 ///
289 /// \param __X
290 ///    An unsigned integer whose bits are to be cleared.
291 /// \returns An unsigned integer containing the result of clearing the bits from
292 ///    the source operand.
293 /// \see _blsi_u32
294 static __inline__ unsigned int __DEFAULT_FN_ATTRS
295 __blsi_u32(unsigned int __X) {
296   return __X & -__X;
297 }
298 
299 /// Clears all bits in the source except for the least significant bit
300 ///    containing a value of 1 and returns the result.
301 ///
302 /// \headerfile <x86intrin.h>
303 ///
304 /// \code
305 /// unsigned int _blsi_u32(unsigned int __X);
306 /// \endcode
307 ///
308 /// This intrinsic corresponds to the \c BLSI instruction.
309 ///
310 /// \param __X
311 ///    An unsigned integer whose bits are to be cleared.
312 /// \returns An unsigned integer containing the result of clearing the bits from
313 ///    the source operand.
314 /// \see __blsi_u32
315 #define _blsi_u32 __blsi_u32
316 
317 /// Creates a mask whose bits are set to 1, using bit 0 up to and
318 ///    including the least significant bit that is set to 1 in the source
319 ///    operand and returns the result.
320 ///
321 /// \headerfile <x86intrin.h>
322 ///
323 /// This intrinsic corresponds to the \c BLSMSK instruction.
324 ///
325 /// \param __X
326 ///    An unsigned integer used to create the mask.
327 /// \returns An unsigned integer containing the newly created mask.
328 /// \see _blsmsk_u32
329 static __inline__ unsigned int __DEFAULT_FN_ATTRS
330 __blsmsk_u32(unsigned int __X) {
331   return __X ^ (__X - 1);
332 }
333 
334 /// Creates a mask whose bits are set to 1, using bit 0 up to and
335 ///    including the least significant bit that is set to 1 in the source
336 ///    operand and returns the result.
337 ///
338 /// \headerfile <x86intrin.h>
339 ///
340 /// \code
341 /// unsigned int _blsmsk_u32(unsigned int __X);
342 /// \endcode
343 ///
344 /// This intrinsic corresponds to the \c BLSMSK instruction.
345 ///
346 /// \param __X
347 ///    An unsigned integer used to create the mask.
348 /// \returns An unsigned integer containing the newly created mask.
349 /// \see __blsmsk_u32
350 #define _blsmsk_u32 __blsmsk_u32
351 
352 /// Clears the least significant bit that is set to 1 in the source
353 ///    operand and returns the result.
354 ///
355 /// \headerfile <x86intrin.h>
356 ///
357 /// This intrinsic corresponds to the \c BLSR instruction.
358 ///
359 /// \param __X
360 ///    An unsigned integer containing the operand to be cleared.
361 /// \returns An unsigned integer containing the result of clearing the source
362 ///    operand.
363 /// \see _blsr_u32
364 static __inline__ unsigned int __DEFAULT_FN_ATTRS
365 __blsr_u32(unsigned int __X) {
366   return __X & (__X - 1);
367 }
368 
369 /// Clears the least significant bit that is set to 1 in the source
370 ///    operand and returns the result.
371 ///
372 /// \headerfile <x86intrin.h>
373 ///
374 /// \code
375 /// unsigned int _bls4_u32(unsigned int __X);
376 /// \endcode
377 ///
378 /// This intrinsic corresponds to the \c BLSR instruction.
379 ///
380 /// \param __X
381 ///    An unsigned integer containing the operand to be cleared.
382 /// \returns An unsigned integer containing the result of clearing the source
383 ///    operand.
384 /// \see __blsr_u32
385 #define _blsr_u32 __blsr_u32
386 
387 #ifdef __x86_64__
388 
389 /// Performs a bitwise AND of the second operand with the one's
390 ///    complement of the first operand.
391 ///
392 /// \headerfile <x86intrin.h>
393 ///
394 /// This intrinsic corresponds to the \c ANDN instruction.
395 ///
396 /// \param __X
397 ///    An unsigned 64-bit integer containing one of the operands.
398 /// \param __Y
399 ///    An unsigned 64-bit integer containing one of the operands.
400 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
401 ///    operand with the one's complement of the first operand.
402 /// \see _andn_u64
403 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
404 __andn_u64 (unsigned long long __X, unsigned long long __Y) {
405   return ~__X & __Y;
406 }
407 
408 /// Performs a bitwise AND of the second operand with the one's
409 ///    complement of the first operand.
410 ///
411 /// \headerfile <x86intrin.h>
412 ///
413 /// \code
414 /// unsigned long long _andn_u64(unsigned long long __X,
415 ///                              unsigned long long __Y);
416 /// \endcode
417 ///
418 /// This intrinsic corresponds to the \c ANDN instruction.
419 ///
420 /// \param __X
421 ///    An unsigned 64-bit integer containing one of the operands.
422 /// \param __Y
423 ///    An unsigned 64-bit integer containing one of the operands.
424 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
425 ///    operand with the one's complement of the first operand.
426 /// \see __andn_u64
427 #define _andn_u64 __andn_u64
428 
429 /* AMD-specified, double-leading-underscore version of BEXTR */
430 /// Extracts the specified bits from the first operand and returns them
431 ///    in the least significant bits of the result.
432 ///
433 /// \headerfile <x86intrin.h>
434 ///
435 /// This intrinsic corresponds to the \c BEXTR instruction.
436 ///
437 /// \param __X
438 ///    An unsigned 64-bit integer whose bits are to be extracted.
439 /// \param __Y
440 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
441 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
442 ///    the number of bits to be extracted.
443 /// \returns An unsigned 64-bit integer whose least significant bits contain the
444 ///    extracted bits.
445 /// \see _bextr_u64
446 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
447 __bextr_u64(unsigned long long __X, unsigned long long __Y) {
448   return __builtin_ia32_bextr_u64(__X, __Y);
449 }
450 
451 /* Intel-specified, single-leading-underscore version of BEXTR */
452 /// Extracts the specified bits from the first operand and returns them
453 ///     in the least significant bits of the result.
454 ///
455 /// \headerfile <x86intrin.h>
456 ///
457 /// This intrinsic corresponds to the \c BEXTR instruction.
458 ///
459 /// \param __X
460 ///    An unsigned 64-bit integer whose bits are to be extracted.
461 /// \param __Y
462 ///    An unsigned integer used to specify the index of the least significant
463 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
464 /// \param __Z
465 ///    An unsigned integer used to specify the number of bits to be extracted.
466 ///    Bits [7:0] specify the number of bits.
467 /// \returns An unsigned 64-bit integer whose least significant bits contain the
468 ///    extracted bits.
469 /// \see __bextr_u64
470 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
471 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
472   return __builtin_ia32_bextr_u64(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
473 }
474 
475 /* Intel-specified, single-leading-underscore version of BEXTR2 */
476 /// Extracts the specified bits from the first operand and returns them
477 ///    in the least significant bits of the result.
478 ///
479 /// \headerfile <x86intrin.h>
480 ///
481 /// This intrinsic corresponds to the \c BEXTR instruction.
482 ///
483 /// \param __X
484 ///    An unsigned 64-bit integer whose bits are to be extracted.
485 /// \param __Y
486 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
487 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
488 ///    the number of bits to be extracted.
489 /// \returns An unsigned 64-bit integer whose least significant bits contain the
490 ///    extracted bits.
491 /// \see __bextr_u64
492 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
493 _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
494   return __builtin_ia32_bextr_u64(__X, __Y);
495 }
496 
497 /// Clears all bits in the source except for the least significant bit
498 ///    containing a value of 1 and returns the result.
499 ///
500 /// \headerfile <x86intrin.h>
501 ///
502 /// This intrinsic corresponds to the \c BLSI instruction.
503 ///
504 /// \param __X
505 ///    An unsigned 64-bit integer whose bits are to be cleared.
506 /// \returns An unsigned 64-bit integer containing the result of clearing the
507 ///    bits from the source operand.
508 /// \see _blsi_u64
509 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
510 __blsi_u64(unsigned long long __X) {
511   return __X & -__X;
512 }
513 
514 /// Clears all bits in the source except for the least significant bit
515 ///    containing a value of 1 and returns the result.
516 ///
517 /// \headerfile <x86intrin.h>
518 ///
519 /// \code
520 /// unsigned long long _blsi_u64(unsigned long long __X);
521 /// \endcode
522 ///
523 /// This intrinsic corresponds to the \c BLSI instruction.
524 ///
525 /// \param __X
526 ///    An unsigned 64-bit integer whose bits are to be cleared.
527 /// \returns An unsigned 64-bit integer containing the result of clearing the
528 ///    bits from the source operand.
529 /// \see __blsi_u64
530 #define _blsi_u64 __blsi_u64
531 
532 /// Creates a mask whose bits are set to 1, using bit 0 up to and
533 ///    including the least significant bit that is set to 1 in the source
534 ///    operand and returns the result.
535 ///
536 /// \headerfile <x86intrin.h>
537 ///
538 /// This intrinsic corresponds to the \c BLSMSK instruction.
539 ///
540 /// \param __X
541 ///    An unsigned 64-bit integer used to create the mask.
542 /// \returns An unsigned 64-bit integer containing the newly created mask.
543 /// \see _blsmsk_u64
544 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
545 __blsmsk_u64(unsigned long long __X) {
546   return __X ^ (__X - 1);
547 }
548 
549 /// Creates a mask whose bits are set to 1, using bit 0 up to and
550 ///    including the least significant bit that is set to 1 in the source
551 ///    operand and returns the result.
552 ///
553 /// \headerfile <x86intrin.h>
554 ///
555 /// \code
556 /// unsigned long long _blsmsk_u64(unsigned long long __X);
557 /// \endcode
558 ///
559 /// This intrinsic corresponds to the \c BLSMSK instruction.
560 ///
561 /// \param __X
562 ///    An unsigned 64-bit integer used to create the mask.
563 /// \returns An unsigned 64-bit integer containing the newly created mask.
564 /// \see __blsmsk_u64
565 #define _blsmsk_u64 __blsmsk_u64
566 
567 /// Clears the least significant bit that is set to 1 in the source
568 ///    operand and returns the result.
569 ///
570 /// \headerfile <x86intrin.h>
571 ///
572 /// This intrinsic corresponds to the \c BLSR instruction.
573 ///
574 /// \param __X
575 ///    An unsigned 64-bit integer containing the operand to be cleared.
576 /// \returns An unsigned 64-bit integer containing the result of clearing the
577 ///    source operand.
578 /// \see _blsr_u64
579 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
580 __blsr_u64(unsigned long long __X) {
581   return __X & (__X - 1);
582 }
583 
584 /// Clears the least significant bit that is set to 1 in the source
585 ///    operand and returns the result.
586 ///
587 /// \headerfile <x86intrin.h>
588 ///
589 /// \code
590 /// unsigned long long _blsr_u64(unsigned long long __X);
591 /// \endcode
592 ///
593 /// This intrinsic corresponds to the \c BLSR instruction.
594 ///
595 /// \param __X
596 ///    An unsigned 64-bit integer containing the operand to be cleared.
597 /// \returns An unsigned 64-bit integer containing the result of clearing the
598 ///    source operand.
599 /// \see __blsr_u64
600 #define _blsr_u64 __blsr_u64
601 
602 #endif /* __x86_64__ */
603 
604 #undef __DEFAULT_FN_ATTRS
605 
606 #endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */
607 
608 #endif /* __BMIINTRIN_H */
609