master
  1/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9
 10#ifndef __IMMINTRIN_H
 11#error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
 12#endif
 13
 14#ifndef __BMI2INTRIN_H
 15#define __BMI2INTRIN_H
 16
 17/* Define the default attributes for the functions in this file. */
 18#if defined(__cplusplus) && (__cplusplus >= 201103L)
 19#define __DEFAULT_FN_ATTRS                                                     \
 20  __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
 21#else
 22#define __DEFAULT_FN_ATTRS                                                     \
 23  __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
 24#endif
 25
 26/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
 27///    starting at bit number \a __Y.
 28///
 29/// \code{.operation}
 30/// i := __Y[7:0]
 31/// result := __X
 32/// IF i < 32
 33///   result[31:i] := 0
 34/// FI
 35/// \endcode
 36///
 37/// \headerfile <immintrin.h>
 38///
 39/// This intrinsic corresponds to the \c BZHI instruction.
 40///
 41/// \param __X
 42///    The 32-bit source value to copy.
 43/// \param __Y
 44///    The lower 8 bits specify the bit number of the lowest bit to zero.
 45/// \returns The partially zeroed 32-bit value.
 46static __inline__ unsigned int __DEFAULT_FN_ATTRS
 47_bzhi_u32(unsigned int __X, unsigned int __Y) {
 48  return __builtin_ia32_bzhi_si(__X, __Y);
 49}
 50
 51/// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
 52///    into the 32-bit result, according to the mask in the unsigned 32-bit
 53///    integer \a __Y. All other bits of the result are zero.
 54///
 55/// \code{.operation}
 56/// i := 0
 57/// result := 0
 58/// FOR m := 0 TO 31
 59///   IF __Y[m] == 1
 60///     result[m] := __X[i]
 61///     i := i + 1
 62///   ENDIF
 63/// ENDFOR
 64/// \endcode
 65///
 66/// \headerfile <immintrin.h>
 67///
 68/// This intrinsic corresponds to the \c PDEP instruction.
 69///
 70/// \param __X
 71///    The 32-bit source value to copy.
 72/// \param __Y
 73///    The 32-bit mask specifying where to deposit source bits.
 74/// \returns The 32-bit result.
 75static __inline__ unsigned int __DEFAULT_FN_ATTRS
 76_pdep_u32(unsigned int __X, unsigned int __Y) {
 77  return __builtin_ia32_pdep_si(__X, __Y);
 78}
 79
 80/// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
 81///    low-order bits of the 32-bit result, according to the mask in the
 82///    unsigned 32-bit integer \a __Y. All other bits of the result are zero.
 83///
 84/// \code{.operation}
 85/// i := 0
 86/// result := 0
 87/// FOR m := 0 TO 31
 88///   IF __Y[m] == 1
 89///     result[i] := __X[m]
 90///     i := i + 1
 91///   ENDIF
 92/// ENDFOR
 93/// \endcode
 94///
 95/// \headerfile <immintrin.h>
 96///
 97/// This intrinsic corresponds to the \c PEXT instruction.
 98///
 99/// \param __X
100///    The 32-bit source value to copy.
101/// \param __Y
102///    The 32-bit mask specifying which source bits to extract.
103/// \returns The 32-bit result.
104static __inline__ unsigned int __DEFAULT_FN_ATTRS
105_pext_u32(unsigned int __X, unsigned int __Y) {
106  return __builtin_ia32_pext_si(__X, __Y);
107}
108
109/// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
110///    64-bit product. Stores the upper 32 bits of the product in the
111///    memory at \a __P and returns the lower 32 bits.
112///
113/// \code{.operation}
114/// Store32(__P, (__X * __Y)[63:32])
115/// result := (__X * __Y)[31:0]
116/// \endcode
117///
118/// \headerfile <immintrin.h>
119///
120/// This intrinsic corresponds to the \c MULX instruction.
121///
122/// \param __X
123///    An unsigned 32-bit multiplicand.
124/// \param __Y
125///    An unsigned 32-bit multiplicand.
126/// \param __P
127///    A pointer to memory for storing the upper half of the product.
128/// \returns The lower half of the product.
129static __inline__ unsigned int __DEFAULT_FN_ATTRS
130_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
131  unsigned long long __res = (unsigned long long) __X * __Y;
132  *__P = (unsigned int)(__res >> 32);
133  return (unsigned int)__res;
134}
135
136#ifdef  __x86_64__
137
138/// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
139///    starting at bit number \a __Y.
140///
141/// \code{.operation}
142/// i := __Y[7:0]
143/// result := __X
144/// IF i < 64
145///   result[63:i] := 0
146/// FI
147/// \endcode
148///
149/// \headerfile <immintrin.h>
150///
151/// This intrinsic corresponds to the \c BZHI instruction.
152///
153/// \param __X
154///    The 64-bit source value to copy.
155/// \param __Y
156///    The lower 8 bits specify the bit number of the lowest bit to zero.
157/// \returns The partially zeroed 64-bit value.
158static __inline__ unsigned long long __DEFAULT_FN_ATTRS
159_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
160  return __builtin_ia32_bzhi_di(__X, __Y);
161}
162
163/// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
164///    into the 64-bit result, according to the mask in the unsigned 64-bit
165///    integer \a __Y. All other bits of the result are zero.
166///
167/// \code{.operation}
168/// i := 0
169/// result := 0
170/// FOR m := 0 TO 63
171///   IF __Y[m] == 1
172///     result[m] := __X[i]
173///     i := i + 1
174///   ENDIF
175/// ENDFOR
176/// \endcode
177///
178/// \headerfile <immintrin.h>
179///
180/// This intrinsic corresponds to the \c PDEP instruction.
181///
182/// \param __X
183///    The 64-bit source value to copy.
184/// \param __Y
185///    The 64-bit mask specifying where to deposit source bits.
186/// \returns The 64-bit result.
187static __inline__ unsigned long long __DEFAULT_FN_ATTRS
188_pdep_u64(unsigned long long __X, unsigned long long __Y) {
189  return __builtin_ia32_pdep_di(__X, __Y);
190}
191
192/// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193///    low-order bits of the 64-bit result, according to the mask in the
194///    unsigned 64-bit integer \a __Y. All other bits of the result are zero.
195///
196/// \code{.operation}
197/// i := 0
198/// result := 0
199/// FOR m := 0 TO 63
200///   IF __Y[m] == 1
201///     result[i] := __X[m]
202///     i := i + 1
203///   ENDIF
204/// ENDFOR
205/// \endcode
206///
207/// \headerfile <immintrin.h>
208///
209/// This intrinsic corresponds to the \c PEXT instruction.
210///
211/// \param __X
212///    The 64-bit source value to copy.
213/// \param __Y
214///    The 64-bit mask specifying which source bits to extract.
215/// \returns The 64-bit result.
216static __inline__ unsigned long long __DEFAULT_FN_ATTRS
217_pext_u64(unsigned long long __X, unsigned long long __Y) {
218  return __builtin_ia32_pext_di(__X, __Y);
219}
220
221/// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
222///    128-bit product. Stores the upper 64 bits of the product to the
223///    memory addressed by \a __P and returns the lower 64 bits.
224///
225/// \code{.operation}
226/// Store64(__P, (__X * __Y)[127:64])
227/// result := (__X * __Y)[63:0]
228/// \endcode
229///
230/// \headerfile <immintrin.h>
231///
232/// This intrinsic corresponds to the \c MULX instruction.
233///
234/// \param __X
235///    An unsigned 64-bit multiplicand.
236/// \param __Y
237///    An unsigned 64-bit multiplicand.
238/// \param __P
239///    A pointer to memory for storing the upper half of the product.
240/// \returns The lower half of the product.
241static __inline__ unsigned long long __DEFAULT_FN_ATTRS
242_mulx_u64 (unsigned long long __X, unsigned long long __Y,
243           unsigned long long *__P) {
244  unsigned __int128 __res = (unsigned __int128) __X * __Y;
245  *__P = (unsigned long long) (__res >> 64);
246  return (unsigned long long) __res;
247}
248
249#endif /* __x86_64__  */
250
251#undef __DEFAULT_FN_ATTRS
252
253#endif /* __BMI2INTRIN_H */