master
  1/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
  2 *
  3 *
  4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5 * See https://llvm.org/LICENSE.txt for license information.
  6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7 *
  8 *===-----------------------------------------------------------------------===
  9 */
 10#ifndef __IMMINTRIN_H
 11#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
 12#endif
 13
 14#ifndef __VBMIVLINTRIN_H
 15#define __VBMIVLINTRIN_H
 16
 17/* Define the default attributes for the functions in this file. */
 18#define __DEFAULT_FN_ATTRS128                                                  \
 19  __attribute__((__always_inline__, __nodebug__,                               \
 20                 __target__("avx512vbmi,avx512vl,no-evex512"),                 \
 21                 __min_vector_width__(128)))
 22#define __DEFAULT_FN_ATTRS256                                                  \
 23  __attribute__((__always_inline__, __nodebug__,                               \
 24                 __target__("avx512vbmi,avx512vl,no-evex512"),                 \
 25                 __min_vector_width__(256)))
 26
 27static __inline__ __m128i __DEFAULT_FN_ATTRS128
 28_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
 29{
 30  return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
 31                                                 (__v16qi)__I,
 32                                                 (__v16qi)__B);
 33}
 34
 35static __inline__ __m128i __DEFAULT_FN_ATTRS128
 36_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
 37                           __m128i __B)
 38{
 39  return (__m128i)__builtin_ia32_selectb_128(__U,
 40                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
 41                                  (__v16qi)__A);
 42}
 43
 44static __inline__ __m128i __DEFAULT_FN_ATTRS128
 45_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
 46                            __m128i __B)
 47{
 48  return (__m128i)__builtin_ia32_selectb_128(__U,
 49                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
 50                                  (__v16qi)__I);
 51}
 52
 53static __inline__ __m128i __DEFAULT_FN_ATTRS128
 54_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
 55                            __m128i __B)
 56{
 57  return (__m128i)__builtin_ia32_selectb_128(__U,
 58                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
 59                                  (__v16qi)_mm_setzero_si128());
 60}
 61
 62static __inline__ __m256i __DEFAULT_FN_ATTRS256
 63_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
 64{
 65  return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
 66                                                 (__v32qi)__B);
 67}
 68
 69static __inline__ __m256i __DEFAULT_FN_ATTRS256
 70_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
 71                              __m256i __B)
 72{
 73  return (__m256i)__builtin_ia32_selectb_256(__U,
 74                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
 75                               (__v32qi)__A);
 76}
 77
 78static __inline__ __m256i __DEFAULT_FN_ATTRS256
 79_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
 80                               __m256i __B)
 81{
 82  return (__m256i)__builtin_ia32_selectb_256(__U,
 83                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
 84                               (__v32qi)__I);
 85}
 86
 87static __inline__ __m256i __DEFAULT_FN_ATTRS256
 88_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
 89                               __m256i __B)
 90{
 91  return (__m256i)__builtin_ia32_selectb_256(__U,
 92                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
 93                               (__v32qi)_mm256_setzero_si256());
 94}
 95
 96static __inline__ __m128i __DEFAULT_FN_ATTRS128
 97_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
 98{
 99  return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
100}
101
102static __inline__ __m128i __DEFAULT_FN_ATTRS128
103_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
104{
105  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
106                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
107                                        (__v16qi)_mm_setzero_si128());
108}
109
110static __inline__ __m128i __DEFAULT_FN_ATTRS128
111_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
112          __m128i __B)
113{
114  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
115                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
116                                        (__v16qi)__W);
117}
118
119static __inline__ __m256i __DEFAULT_FN_ATTRS256
120_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
121{
122  return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
123}
124
125static __inline__ __m256i __DEFAULT_FN_ATTRS256
126_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
127        __m256i __B)
128{
129  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
130                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
131                                     (__v32qi)_mm256_setzero_si256());
132}
133
134static __inline__ __m256i __DEFAULT_FN_ATTRS256
135_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
136             __m256i __B)
137{
138  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
139                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
140                                     (__v32qi)__W);
141}
142
143static __inline__ __m128i __DEFAULT_FN_ATTRS128
144_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
145{
146  return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
147}
148
149static __inline__ __m128i __DEFAULT_FN_ATTRS128
150_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
151                               __m128i __Y)
152{
153  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
154                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
155                                   (__v16qi)__W);
156}
157
158static __inline__ __m128i __DEFAULT_FN_ATTRS128
159_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
160{
161  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
162                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
163                                   (__v16qi)_mm_setzero_si128());
164}
165
166static __inline__ __m256i __DEFAULT_FN_ATTRS256
167_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
168{
169  return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
170}
171
172static __inline__ __m256i __DEFAULT_FN_ATTRS256
173_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
174                                  __m256i __Y)
175{
176  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
177                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
178                                (__v32qi)__W);
179}
180
181static __inline__ __m256i __DEFAULT_FN_ATTRS256
182_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
183{
184  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
185                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
186                                (__v32qi)_mm256_setzero_si256());
187}
188
189
190#undef __DEFAULT_FN_ATTRS128
191#undef __DEFAULT_FN_ATTRS256
192
193#endif