master
  1/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9
 10#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
 11#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
 12#endif
 13
 14#ifndef __BMIINTRIN_H
 15#define __BMIINTRIN_H
 16
 17/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
 18   instruction behaves as BSF on non-BMI targets, there is code that expects
 19   to use it as a potentially faster version of BSF. */
 20#if defined(__cplusplus) && (__cplusplus >= 201103L)
 21#define __RELAXED_FN_ATTRS                                                     \
 22  __attribute__((__always_inline__, __nodebug__)) constexpr
 23#else
 24#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 25#endif
 26
 27/// Counts the number of trailing zero bits in the operand.
 28///
 29/// \headerfile <x86intrin.h>
 30///
 31/// This intrinsic corresponds to the \c TZCNT instruction.
 32///
 33/// \param __X
 34///    An unsigned 16-bit integer whose trailing zeros are to be counted.
 35/// \returns An unsigned 16-bit integer containing the number of trailing zero
 36///    bits in the operand.
 37/// \see _tzcnt_u16
 38static __inline__ unsigned short __RELAXED_FN_ATTRS
 39__tzcnt_u16(unsigned short __X) {
 40  return __builtin_ia32_tzcnt_u16(__X);
 41}
 42
 43/// Counts the number of trailing zero bits in the operand.
 44///
 45/// \headerfile <x86intrin.h>
 46///
 47/// \code
 48/// unsigned short _tzcnt_u16(unsigned short __X);
 49/// \endcode
 50///
 51/// This intrinsic corresponds to the \c TZCNT instruction.
 52///
 53/// \param __X
 54///    An unsigned 16-bit integer whose trailing zeros are to be counted.
 55/// \returns An unsigned 16-bit integer containing the number of trailing zero
 56///    bits in the operand.
 57/// \see __tzcnt_u16
 58#define _tzcnt_u16 __tzcnt_u16
 59
 60/// Counts the number of trailing zero bits in the operand.
 61///
 62/// \headerfile <x86intrin.h>
 63///
 64/// This intrinsic corresponds to the \c TZCNT instruction.
 65///
 66/// \param __X
 67///    An unsigned 32-bit integer whose trailing zeros are to be counted.
 68/// \returns An unsigned 32-bit integer containing the number of trailing zero
 69///    bits in the operand.
 70/// \see { _mm_tzcnt_32 _tzcnt_u32 }
 71static __inline__ unsigned int __RELAXED_FN_ATTRS
 72__tzcnt_u32(unsigned int __X) {
 73  return __builtin_ia32_tzcnt_u32(__X);
 74}
 75
 76/// Counts the number of trailing zero bits in the operand.
 77///
 78/// \headerfile <x86intrin.h>
 79///
 80/// This intrinsic corresponds to the \c TZCNT instruction.
 81///
 82/// \param __X
 83///    An unsigned 32-bit integer whose trailing zeros are to be counted.
 84/// \returns A 32-bit integer containing the number of trailing zero bits in
 85///    the operand.
 86/// \see { __tzcnt_u32 _tzcnt_u32 }
 87static __inline__ int __RELAXED_FN_ATTRS
 88_mm_tzcnt_32(unsigned int __X) {
 89  return (int)__builtin_ia32_tzcnt_u32(__X);
 90}
 91
 92/// Counts the number of trailing zero bits in the operand.
 93///
 94/// \headerfile <x86intrin.h>
 95///
 96/// \code
 97/// unsigned int _tzcnt_u32(unsigned int __X);
 98/// \endcode
 99///
100/// This intrinsic corresponds to the \c TZCNT instruction.
101///
102/// \param __X
103///    An unsigned 32-bit integer whose trailing zeros are to be counted.
104/// \returns An unsigned 32-bit integer containing the number of trailing zero
105///    bits in the operand.
106/// \see { _mm_tzcnt_32 __tzcnt_u32 }
107#define _tzcnt_u32 __tzcnt_u32
108
109#ifdef __x86_64__
110
111/// Counts the number of trailing zero bits in the operand.
112///
113/// \headerfile <x86intrin.h>
114///
115/// This intrinsic corresponds to the \c TZCNT instruction.
116///
117/// \param __X
118///    An unsigned 64-bit integer whose trailing zeros are to be counted.
119/// \returns An unsigned 64-bit integer containing the number of trailing zero
120///    bits in the operand.
121/// \see { _mm_tzcnt_64 _tzcnt_u64 }
122static __inline__ unsigned long long __RELAXED_FN_ATTRS
123__tzcnt_u64(unsigned long long __X) {
124  return __builtin_ia32_tzcnt_u64(__X);
125}
126
127/// Counts the number of trailing zero bits in the operand.
128///
129/// \headerfile <x86intrin.h>
130///
131/// This intrinsic corresponds to the \c TZCNT instruction.
132///
133/// \param __X
134///    An unsigned 64-bit integer whose trailing zeros are to be counted.
135/// \returns An 64-bit integer containing the number of trailing zero bits in
136///    the operand.
137/// \see { __tzcnt_u64 _tzcnt_u64 }
138static __inline__ long long __RELAXED_FN_ATTRS
139_mm_tzcnt_64(unsigned long long __X) {
140  return (long long)__builtin_ia32_tzcnt_u64(__X);
141}
142
143/// Counts the number of trailing zero bits in the operand.
144///
145/// \headerfile <x86intrin.h>
146///
147/// \code
148/// unsigned long long _tzcnt_u64(unsigned long long __X);
149/// \endcode
150///
151/// This intrinsic corresponds to the \c TZCNT instruction.
152///
153/// \param __X
154///    An unsigned 64-bit integer whose trailing zeros are to be counted.
155/// \returns An unsigned 64-bit integer containing the number of trailing zero
156///    bits in the operand.
157/// \see { _mm_tzcnt_64 __tzcnt_u64
158#define _tzcnt_u64 __tzcnt_u64
159
160#endif /* __x86_64__ */
161
162#undef __RELAXED_FN_ATTRS
163
164/* Define the default attributes for the functions in this file. */
165#if defined(__cplusplus) && (__cplusplus >= 201103L)
166#define __DEFAULT_FN_ATTRS                                                     \
167  __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) constexpr
168#else
169#define __DEFAULT_FN_ATTRS                                                     \
170  __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
171#endif
172
173/// Performs a bitwise AND of the second operand with the one's
174///    complement of the first operand.
175///
176/// \headerfile <x86intrin.h>
177///
178/// This intrinsic corresponds to the \c ANDN instruction.
179///
180/// \param __X
181///    An unsigned integer containing one of the operands.
182/// \param __Y
183///    An unsigned integer containing one of the operands.
184/// \returns An unsigned integer containing the bitwise AND of the second
185///    operand with the one's complement of the first operand.
186/// \see _andn_u32
187static __inline__ unsigned int __DEFAULT_FN_ATTRS
188__andn_u32(unsigned int __X, unsigned int __Y) {
189  return ~__X & __Y;
190}
191
192/// Performs a bitwise AND of the second operand with the one's
193///    complement of the first operand.
194///
195/// \headerfile <x86intrin.h>
196///
197/// \code
198/// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
199/// \endcode
200///
201/// This intrinsic corresponds to the \c ANDN instruction.
202///
203/// \param __X
204///    An unsigned integer containing one of the operands.
205/// \param __Y
206///    An unsigned integer containing one of the operands.
207/// \returns An unsigned integer containing the bitwise AND of the second
208///    operand with the one's complement of the first operand.
209/// \see __andn_u32
210#define _andn_u32 __andn_u32
211
212/* AMD-specified, double-leading-underscore version of BEXTR */
213/// Extracts the specified bits from the first operand and returns them
214///    in the least significant bits of the result.
215///
216/// \headerfile <x86intrin.h>
217///
218/// This intrinsic corresponds to the \c BEXTR instruction.
219///
220/// \param __X
221///    An unsigned integer whose bits are to be extracted.
222/// \param __Y
223///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
224///    specify the index of the least significant bit. Bits [15:8] specify the
225///    number of bits to be extracted.
226/// \returns An unsigned integer whose least significant bits contain the
227///    extracted bits.
228/// \see _bextr_u32
229static __inline__ unsigned int __DEFAULT_FN_ATTRS
230__bextr_u32(unsigned int __X, unsigned int __Y) {
231  return __builtin_ia32_bextr_u32(__X, __Y);
232}
233
234/* Intel-specified, single-leading-underscore version of BEXTR */
235/// Extracts the specified bits from the first operand and returns them
236///    in the least significant bits of the result.
237///
238/// \headerfile <x86intrin.h>
239///
240/// This intrinsic corresponds to the \c BEXTR instruction.
241///
242/// \param __X
243///    An unsigned integer whose bits are to be extracted.
244/// \param __Y
245///    An unsigned integer used to specify the index of the least significant
246///    bit for the bits to be extracted. Bits [7:0] specify the index.
247/// \param __Z
248///    An unsigned integer used to specify the number of bits to be extracted.
249///    Bits [7:0] specify the number of bits.
250/// \returns An unsigned integer whose least significant bits contain the
251///    extracted bits.
252/// \see __bextr_u32
253static __inline__ unsigned int __DEFAULT_FN_ATTRS
254_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) {
255  return __builtin_ia32_bextr_u32(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
256}
257
258/* Intel-specified, single-leading-underscore version of BEXTR2 */
259/// Extracts the specified bits from the first operand and returns them
260///    in the least significant bits of the result.
261///
262/// \headerfile <x86intrin.h>
263///
264/// This intrinsic corresponds to the \c BEXTR instruction.
265///
266/// \param __X
267///    An unsigned integer whose bits are to be extracted.
268/// \param __Y
269///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
270///    specify the index of the least significant bit. Bits [15:8] specify the
271///    number of bits to be extracted.
272/// \returns An unsigned integer whose least significant bits contain the
273///    extracted bits.
274/// \see __bextr_u32
275static __inline__ unsigned int __DEFAULT_FN_ATTRS
276_bextr2_u32(unsigned int __X, unsigned int __Y) {
277  return __builtin_ia32_bextr_u32(__X, __Y);
278}
279
280/// Clears all bits in the source except for the least significant bit
281///    containing a value of 1 and returns the result.
282///
283/// \headerfile <x86intrin.h>
284///
285/// This intrinsic corresponds to the \c BLSI instruction.
286///
287/// \param __X
288///    An unsigned integer whose bits are to be cleared.
289/// \returns An unsigned integer containing the result of clearing the bits from
290///    the source operand.
291/// \see _blsi_u32
292static __inline__ unsigned int __DEFAULT_FN_ATTRS
293__blsi_u32(unsigned int __X) {
294  return __X & -__X;
295}
296
297/// Clears all bits in the source except for the least significant bit
298///    containing a value of 1 and returns the result.
299///
300/// \headerfile <x86intrin.h>
301///
302/// \code
303/// unsigned int _blsi_u32(unsigned int __X);
304/// \endcode
305///
306/// This intrinsic corresponds to the \c BLSI instruction.
307///
308/// \param __X
309///    An unsigned integer whose bits are to be cleared.
310/// \returns An unsigned integer containing the result of clearing the bits from
311///    the source operand.
312/// \see __blsi_u32
313#define _blsi_u32 __blsi_u32
314
315/// Creates a mask whose bits are set to 1, using bit 0 up to and
316///    including the least significant bit that is set to 1 in the source
317///    operand and returns the result.
318///
319/// \headerfile <x86intrin.h>
320///
321/// This intrinsic corresponds to the \c BLSMSK instruction.
322///
323/// \param __X
324///    An unsigned integer used to create the mask.
325/// \returns An unsigned integer containing the newly created mask.
326/// \see _blsmsk_u32
327static __inline__ unsigned int __DEFAULT_FN_ATTRS
328__blsmsk_u32(unsigned int __X) {
329  return __X ^ (__X - 1);
330}
331
332/// Creates a mask whose bits are set to 1, using bit 0 up to and
333///    including the least significant bit that is set to 1 in the source
334///    operand and returns the result.
335///
336/// \headerfile <x86intrin.h>
337///
338/// \code
339/// unsigned int _blsmsk_u32(unsigned int __X);
340/// \endcode
341///
342/// This intrinsic corresponds to the \c BLSMSK instruction.
343///
344/// \param __X
345///    An unsigned integer used to create the mask.
346/// \returns An unsigned integer containing the newly created mask.
347/// \see __blsmsk_u32
348#define _blsmsk_u32 __blsmsk_u32
349
350/// Clears the least significant bit that is set to 1 in the source
351///    operand and returns the result.
352///
353/// \headerfile <x86intrin.h>
354///
355/// This intrinsic corresponds to the \c BLSR instruction.
356///
357/// \param __X
358///    An unsigned integer containing the operand to be cleared.
359/// \returns An unsigned integer containing the result of clearing the source
360///    operand.
361/// \see _blsr_u32
362static __inline__ unsigned int __DEFAULT_FN_ATTRS
363__blsr_u32(unsigned int __X) {
364  return __X & (__X - 1);
365}
366
367/// Clears the least significant bit that is set to 1 in the source
368///    operand and returns the result.
369///
370/// \headerfile <x86intrin.h>
371///
372/// \code
373/// unsigned int _bls4_u32(unsigned int __X);
374/// \endcode
375///
376/// This intrinsic corresponds to the \c BLSR instruction.
377///
378/// \param __X
379///    An unsigned integer containing the operand to be cleared.
380/// \returns An unsigned integer containing the result of clearing the source
381///    operand.
382/// \see __blsr_u32
383#define _blsr_u32 __blsr_u32
384
385#ifdef __x86_64__
386
387/// Performs a bitwise AND of the second operand with the one's
388///    complement of the first operand.
389///
390/// \headerfile <x86intrin.h>
391///
392/// This intrinsic corresponds to the \c ANDN instruction.
393///
394/// \param __X
395///    An unsigned 64-bit integer containing one of the operands.
396/// \param __Y
397///    An unsigned 64-bit integer containing one of the operands.
398/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
399///    operand with the one's complement of the first operand.
400/// \see _andn_u64
401static __inline__ unsigned long long __DEFAULT_FN_ATTRS
402__andn_u64 (unsigned long long __X, unsigned long long __Y) {
403  return ~__X & __Y;
404}
405
406/// Performs a bitwise AND of the second operand with the one's
407///    complement of the first operand.
408///
409/// \headerfile <x86intrin.h>
410///
411/// \code
412/// unsigned long long _andn_u64(unsigned long long __X,
413///                              unsigned long long __Y);
414/// \endcode
415///
416/// This intrinsic corresponds to the \c ANDN instruction.
417///
418/// \param __X
419///    An unsigned 64-bit integer containing one of the operands.
420/// \param __Y
421///    An unsigned 64-bit integer containing one of the operands.
422/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
423///    operand with the one's complement of the first operand.
424/// \see __andn_u64
425#define _andn_u64 __andn_u64
426
427/* AMD-specified, double-leading-underscore version of BEXTR */
428/// Extracts the specified bits from the first operand and returns them
429///    in the least significant bits of the result.
430///
431/// \headerfile <x86intrin.h>
432///
433/// This intrinsic corresponds to the \c BEXTR instruction.
434///
435/// \param __X
436///    An unsigned 64-bit integer whose bits are to be extracted.
437/// \param __Y
438///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
439///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
440///    the number of bits to be extracted.
441/// \returns An unsigned 64-bit integer whose least significant bits contain the
442///    extracted bits.
443/// \see _bextr_u64
444static __inline__ unsigned long long __DEFAULT_FN_ATTRS
445__bextr_u64(unsigned long long __X, unsigned long long __Y) {
446  return __builtin_ia32_bextr_u64(__X, __Y);
447}
448
449/* Intel-specified, single-leading-underscore version of BEXTR */
450/// Extracts the specified bits from the first operand and returns them
451///     in the least significant bits of the result.
452///
453/// \headerfile <x86intrin.h>
454///
455/// This intrinsic corresponds to the \c BEXTR instruction.
456///
457/// \param __X
458///    An unsigned 64-bit integer whose bits are to be extracted.
459/// \param __Y
460///    An unsigned integer used to specify the index of the least significant
461///    bit for the bits to be extracted. Bits [7:0] specify the index.
462/// \param __Z
463///    An unsigned integer used to specify the number of bits to be extracted.
464///    Bits [7:0] specify the number of bits.
465/// \returns An unsigned 64-bit integer whose least significant bits contain the
466///    extracted bits.
467/// \see __bextr_u64
468static __inline__ unsigned long long __DEFAULT_FN_ATTRS
469_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
470  return __builtin_ia32_bextr_u64(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
471}
472
473/* Intel-specified, single-leading-underscore version of BEXTR2 */
474/// Extracts the specified bits from the first operand and returns them
475///    in the least significant bits of the result.
476///
477/// \headerfile <x86intrin.h>
478///
479/// This intrinsic corresponds to the \c BEXTR instruction.
480///
481/// \param __X
482///    An unsigned 64-bit integer whose bits are to be extracted.
483/// \param __Y
484///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
485///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
486///    the number of bits to be extracted.
487/// \returns An unsigned 64-bit integer whose least significant bits contain the
488///    extracted bits.
489/// \see __bextr_u64
490static __inline__ unsigned long long __DEFAULT_FN_ATTRS
491_bextr2_u64(unsigned long long __X, unsigned long long __Y) {
492  return __builtin_ia32_bextr_u64(__X, __Y);
493}
494
495/// Clears all bits in the source except for the least significant bit
496///    containing a value of 1 and returns the result.
497///
498/// \headerfile <x86intrin.h>
499///
500/// This intrinsic corresponds to the \c BLSI instruction.
501///
502/// \param __X
503///    An unsigned 64-bit integer whose bits are to be cleared.
504/// \returns An unsigned 64-bit integer containing the result of clearing the
505///    bits from the source operand.
506/// \see _blsi_u64
507static __inline__ unsigned long long __DEFAULT_FN_ATTRS
508__blsi_u64(unsigned long long __X) {
509  return __X & -__X;
510}
511
512/// Clears all bits in the source except for the least significant bit
513///    containing a value of 1 and returns the result.
514///
515/// \headerfile <x86intrin.h>
516///
517/// \code
518/// unsigned long long _blsi_u64(unsigned long long __X);
519/// \endcode
520///
521/// This intrinsic corresponds to the \c BLSI instruction.
522///
523/// \param __X
524///    An unsigned 64-bit integer whose bits are to be cleared.
525/// \returns An unsigned 64-bit integer containing the result of clearing the
526///    bits from the source operand.
527/// \see __blsi_u64
528#define _blsi_u64 __blsi_u64
529
530/// Creates a mask whose bits are set to 1, using bit 0 up to and
531///    including the least significant bit that is set to 1 in the source
532///    operand and returns the result.
533///
534/// \headerfile <x86intrin.h>
535///
536/// This intrinsic corresponds to the \c BLSMSK instruction.
537///
538/// \param __X
539///    An unsigned 64-bit integer used to create the mask.
540/// \returns An unsigned 64-bit integer containing the newly created mask.
541/// \see _blsmsk_u64
542static __inline__ unsigned long long __DEFAULT_FN_ATTRS
543__blsmsk_u64(unsigned long long __X) {
544  return __X ^ (__X - 1);
545}
546
547/// Creates a mask whose bits are set to 1, using bit 0 up to and
548///    including the least significant bit that is set to 1 in the source
549///    operand and returns the result.
550///
551/// \headerfile <x86intrin.h>
552///
553/// \code
554/// unsigned long long _blsmsk_u64(unsigned long long __X);
555/// \endcode
556///
557/// This intrinsic corresponds to the \c BLSMSK instruction.
558///
559/// \param __X
560///    An unsigned 64-bit integer used to create the mask.
561/// \returns An unsigned 64-bit integer containing the newly created mask.
562/// \see __blsmsk_u64
563#define _blsmsk_u64 __blsmsk_u64
564
565/// Clears the least significant bit that is set to 1 in the source
566///    operand and returns the result.
567///
568/// \headerfile <x86intrin.h>
569///
570/// This intrinsic corresponds to the \c BLSR instruction.
571///
572/// \param __X
573///    An unsigned 64-bit integer containing the operand to be cleared.
574/// \returns An unsigned 64-bit integer containing the result of clearing the
575///    source operand.
576/// \see _blsr_u64
577static __inline__ unsigned long long __DEFAULT_FN_ATTRS
578__blsr_u64(unsigned long long __X) {
579  return __X & (__X - 1);
580}
581
582/// Clears the least significant bit that is set to 1 in the source
583///    operand and returns the result.
584///
585/// \headerfile <x86intrin.h>
586///
587/// \code
588/// unsigned long long _blsr_u64(unsigned long long __X);
589/// \endcode
590///
591/// This intrinsic corresponds to the \c BLSR instruction.
592///
593/// \param __X
594///    An unsigned 64-bit integer containing the operand to be cleared.
595/// \returns An unsigned 64-bit integer containing the result of clearing the
596///    source operand.
597/// \see __blsr_u64
598#define _blsr_u64 __blsr_u64
599
600#endif /* __x86_64__ */
601
602#undef __DEFAULT_FN_ATTRS
603
604#endif /* __BMIINTRIN_H */